diff --git a/db/column_family_test.cc b/db/column_family_test.cc index 24e1925e1..c55eb1290 100644 --- a/db/column_family_test.cc +++ b/db/column_family_test.cc @@ -554,7 +554,7 @@ class ColumnFamilyTest INSTANTIATE_TEST_CASE_P(FormatDef, ColumnFamilyTest, testing::Values(test::kDefaultFormatVersion)); INSTANTIATE_TEST_CASE_P(FormatLatest, ColumnFamilyTest, - testing::Values(test::kLatestFormatVersion)); + testing::Values(kLatestFormatVersion)); TEST_P(ColumnFamilyTest, DontReuseColumnFamilyID) { for (int iter = 0; iter < 3; ++iter) { @@ -746,8 +746,8 @@ INSTANTIATE_TEST_CASE_P( std::make_tuple(test::kDefaultFormatVersion, false))); INSTANTIATE_TEST_CASE_P( FormatLatest, FlushEmptyCFTestWithParam, - testing::Values(std::make_tuple(test::kLatestFormatVersion, true), - std::make_tuple(test::kLatestFormatVersion, false))); + testing::Values(std::make_tuple(kLatestFormatVersion, true), + std::make_tuple(kLatestFormatVersion, false))); TEST_P(ColumnFamilyTest, AddDrop) { Open(); diff --git a/db/comparator_db_test.cc b/db/comparator_db_test.cc index 876cf07fa..672dffde2 100644 --- a/db/comparator_db_test.cc +++ b/db/comparator_db_test.cc @@ -317,7 +317,7 @@ class ComparatorDBTest INSTANTIATE_TEST_CASE_P(FormatDef, ComparatorDBTest, testing::Values(test::kDefaultFormatVersion)); INSTANTIATE_TEST_CASE_P(FormatLatest, ComparatorDBTest, - testing::Values(test::kLatestFormatVersion)); + testing::Values(kLatestFormatVersion)); TEST_P(ComparatorDBTest, Bytewise) { for (int rand_seed = 301; rand_seed < 306; rand_seed++) { diff --git a/db/corruption_test.cc b/db/corruption_test.cc index 09825b1be..e4a833d29 100644 --- a/db/corruption_test.cc +++ b/db/corruption_test.cc @@ -546,7 +546,7 @@ TEST_F(CorruptionTest, RangeDeletionCorrupted) { BlockHandle range_del_handle; ASSERT_OK(FindMetaBlockInFile( file_reader.get(), file_size, kBlockBasedTableMagicNumber, - ImmutableOptions(options_), kRangeDelBlock, &range_del_handle)); + ImmutableOptions(options_), kRangeDelBlockName, &range_del_handle)); ASSERT_OK(TryReopen()); ASSERT_OK(test::CorruptFile(env_, filename, diff --git a/db/db_basic_test.cc b/db/db_basic_test.cc index 622f907a8..b6075b6fb 100644 --- a/db/db_basic_test.cc +++ b/db/db_basic_test.cc @@ -15,6 +15,7 @@ #include "rocksdb/flush_block_policy.h" #include "rocksdb/merge_operator.h" #include "rocksdb/perf_context.h" +#include "rocksdb/table.h" #include "rocksdb/utilities/debug.h" #include "table/block_based/block_based_table_reader.h" #include "table/block_based/block_builder.h" @@ -972,8 +973,15 @@ TEST_F(DBBasicTest, MultiGetEmpty) { } while (ChangeCompactOptions()); } -TEST_F(DBBasicTest, ChecksumTest) { +class DBBlockChecksumTest : public DBBasicTest, + public testing::WithParamInterface {}; + +INSTANTIATE_TEST_CASE_P(FormatVersions, DBBlockChecksumTest, + testing::ValuesIn(test::kFooterFormatVersionsToTest)); + +TEST_P(DBBlockChecksumTest, BlockChecksumTest) { BlockBasedTableOptions table_options; + table_options.format_version = GetParam(); Options options = CurrentOptions(); const int kNumPerFile = 2; diff --git a/db/db_block_cache_test.cc b/db/db_block_cache_test.cc index c34e59260..cb571ebe8 100644 --- a/db/db_block_cache_test.cc +++ b/db/db_block_cache_test.cc @@ -15,6 +15,7 @@ #include "db/column_family.h" #include "db/db_test_util.h" #include "port/stack_trace.h" +#include "rocksdb/persistent_cache.h" #include "rocksdb/statistics.h" #include "rocksdb/table.h" #include "util/compression.h" diff --git a/db/db_bloom_filter_test.cc b/db/db_bloom_filter_test.cc index 5bc746a8e..92f4db438 100644 --- a/db/db_bloom_filter_test.cc +++ b/db/db_bloom_filter_test.cc @@ -551,10 +551,9 @@ INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P( FormatLatest, DBBloomFilterTestWithParam, ::testing::Values( - std::make_tuple(BFP::kDeprecatedBlock, false, - test::kLatestFormatVersion), - std::make_tuple(BFP::kAutoBloom, true, test::kLatestFormatVersion), - std::make_tuple(BFP::kAutoBloom, false, test::kLatestFormatVersion))); + std::make_tuple(BFP::kDeprecatedBlock, false, kLatestFormatVersion), + std::make_tuple(BFP::kAutoBloom, true, kLatestFormatVersion), + std::make_tuple(BFP::kAutoBloom, false, kLatestFormatVersion))); #endif // !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN) TEST_F(DBBloomFilterTest, BloomFilterRate) { diff --git a/db/db_test_util.cc b/db/db_test_util.cc index 61daaa446..4168217ee 100644 --- a/db/db_test_util.cc +++ b/db/db_test_util.cc @@ -15,6 +15,7 @@ #include "rocksdb/env_encryption.h" #include "rocksdb/unique_id.h" #include "rocksdb/utilities/object_registry.h" +#include "table/format.h" #include "util/random.h" namespace ROCKSDB_NAMESPACE { @@ -516,6 +517,11 @@ Options DBTestBase::GetOptions( table_options.index_block_restart_interval = 8; break; } + case kBlockBasedTableWithLatestFormat: { + // In case different from default + table_options.format_version = kLatestFormatVersion; + break; + } case kOptimizeFiltersForHits: { options.optimize_filters_for_hits = true; set_block_based_table_factory = true; diff --git a/db/db_test_util.h b/db/db_test_util.h index a552ea355..ea2765170 100644 --- a/db/db_test_util.h +++ b/db/db_test_util.h @@ -867,6 +867,7 @@ class DBTestBase : public testing::Test { kBlockBasedTableWithIndexRestartInterval, kBlockBasedTableWithPartitionedIndex, kBlockBasedTableWithPartitionedIndexFormat4, + kBlockBasedTableWithLatestFormat, kPartitionedFilterWithNewTableReaderForCompactions, kUniversalSubcompactions, kUnorderedWrite, diff --git a/db/external_sst_file_test.cc b/db/external_sst_file_test.cc index 38beb32d2..894bc5d2a 100644 --- a/db/external_sst_file_test.cc +++ b/db/external_sst_file_test.cc @@ -41,16 +41,33 @@ class ExternalSSTTestEnv : public EnvWrapper { bool fail_link_; }; +class ExternalSSTFileTestBase : public DBTestBase { + public: + ExternalSSTFileTestBase() + : DBTestBase("external_sst_file_test", /*env_do_fsync=*/true) { + sst_files_dir_ = dbname_ + "/sst_files/"; + DestroyAndRecreateExternalSSTFilesDir(); + } + + void DestroyAndRecreateExternalSSTFilesDir() { + ASSERT_OK(DestroyDir(env_, sst_files_dir_)); + ASSERT_OK(env_->CreateDir(sst_files_dir_)); + } + + ~ExternalSSTFileTestBase() override { + DestroyDir(env_, sst_files_dir_).PermitUncheckedError(); + } + + protected: + std::string sst_files_dir_; +}; + class ExternSSTFileLinkFailFallbackTest - : public DBTestBase, + : public ExternalSSTFileTestBase, public ::testing::WithParamInterface> { public: ExternSSTFileLinkFailFallbackTest() - : DBTestBase("external_sst_file_test", /*env_do_fsync=*/true), - test_env_(new ExternalSSTTestEnv(env_, true)) { - sst_files_dir_ = dbname_ + "/sst_files/"; - EXPECT_EQ(DestroyDir(env_, sst_files_dir_), Status::OK()); - EXPECT_EQ(env_->CreateDir(sst_files_dir_), Status::OK()); + : test_env_(new ExternalSSTTestEnv(env_, true)) { options_ = CurrentOptions(); options_.disable_auto_compactions = true; options_.env = test_env_; @@ -65,25 +82,15 @@ class ExternSSTFileLinkFailFallbackTest } protected: - std::string sst_files_dir_; Options options_; ExternalSSTTestEnv* test_env_; }; class ExternalSSTFileTest - : public DBTestBase, + : public ExternalSSTFileTestBase, public ::testing::WithParamInterface> { public: - ExternalSSTFileTest() - : DBTestBase("external_sst_file_test", /*env_do_fsync=*/true) { - sst_files_dir_ = dbname_ + "/sst_files/"; - DestroyAndRecreateExternalSSTFilesDir(); - } - - void DestroyAndRecreateExternalSSTFilesDir() { - ASSERT_OK(DestroyDir(env_, sst_files_dir_)); - ASSERT_OK(env_->CreateDir(sst_files_dir_)); - } + ExternalSSTFileTest() {} Status GenerateOneExternalFile( const Options& options, ColumnFamilyHandle* cfh, @@ -282,13 +289,8 @@ class ExternalSSTFileTest return db_->IngestExternalFile(files, opts); } - ~ExternalSSTFileTest() override { - DestroyDir(env_, sst_files_dir_).PermitUncheckedError(); - } - protected: int last_file_id_ = 0; - std::string sst_files_dir_; }; TEST_F(ExternalSSTFileTest, Basic) { @@ -2382,10 +2384,18 @@ TEST_F(ExternalSSTFileTest, IngestFileWrittenWithCompressionDictionary) { ASSERT_EQ(1, num_compression_dicts); } +class ExternalSSTBlockChecksumTest + : public ExternalSSTFileTestBase, + public testing::WithParamInterface {}; + +INSTANTIATE_TEST_CASE_P(FormatVersions, ExternalSSTBlockChecksumTest, + testing::ValuesIn(test::kFooterFormatVersionsToTest)); + // Very slow, not worth the cost to run regularly -TEST_F(ExternalSSTFileTest, DISABLED_HugeBlockChecksum) { +TEST_P(ExternalSSTBlockChecksumTest, DISABLED_HugeBlockChecksum) { + BlockBasedTableOptions table_options; + table_options.format_version = GetParam(); for (auto t : GetSupportedChecksums()) { - BlockBasedTableOptions table_options; table_options.checksum = t; Options options = CurrentOptions(); options.table_factory.reset(NewBlockBasedTableFactory(table_options)); diff --git a/db/version_set.cc b/db/version_set.cc index c0947c071..b240711e6 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -1271,8 +1271,8 @@ Status Version::GetTableProperties(std::shared_ptr* tp, return s; } - // By setting the magic number to kInvalidTableMagicNumber, we can by - // pass the magic number check in the footer. + // By setting the magic number to kNullTableMagicNumber, we can bypass + // the magic number check in the footer. std::unique_ptr file_reader( new RandomAccessFileReader( std::move(file), file_name, nullptr /* env */, io_tracer_, @@ -1281,7 +1281,7 @@ Status Version::GetTableProperties(std::shared_ptr* tp, std::unique_ptr props; s = ReadTableProperties( file_reader.get(), file_meta->fd.GetFileSize(), - Footer::kInvalidTableMagicNumber /* table's magic number */, *ioptions, + Footer::kNullTableMagicNumber /* table's magic number */, *ioptions, &props); if (!s.ok()) { return s; diff --git a/include/rocksdb/stats_history.h b/include/rocksdb/stats_history.h index 4acaad26f..57e469295 100644 --- a/include/rocksdb/stats_history.h +++ b/include/rocksdb/stats_history.h @@ -53,6 +53,7 @@ class StatsHistoryIterator { // REQUIRES: Valid() virtual uint64_t GetStatsTime() const = 0; + // DEPRECATED (was never used) virtual int GetFormatVersion() const { return -1; } // Return the current stats history as an std::map which specifies the diff --git a/include/rocksdb/table.h b/include/rocksdb/table.h index dcc24df18..3590b7f42 100644 --- a/include/rocksdb/table.h +++ b/include/rocksdb/table.h @@ -44,6 +44,9 @@ class WritableFileWriter; struct ConfigOptions; struct EnvOptions; +// Types of checksums to use for checking integrity of logical blocks within +// files. All checksums currently use 32 bits of checking power (1 in 4B +// chance of failing to detect random corruption). enum ChecksumType : char { kNoChecksum = 0x0, kCRC32c = 0x1, @@ -390,10 +393,9 @@ struct BlockBasedTableOptions { // Default: 0 (disabled) uint32_t read_amp_bytes_per_bit = 0; - // We currently have five versions: - // 0 -- This version is currently written out by all RocksDB's versions by - // default. Can be read by really old RocksDB's. Doesn't support changing - // checksum (default is CRC32). + // We currently have these versions: + // 0 -- This version can be read by really old RocksDB's. Doesn't support + // changing checksum type (default is CRC32). // 1 -- Can be read by RocksDB's versions since 3.0. Supports non-default // checksum, like xxHash. It is written by RocksDB when // BlockBasedTableOptions::checksum is something other than kCRC32c. (version diff --git a/table/adaptive/adaptive_table_factory.cc b/table/adaptive/adaptive_table_factory.cc index 63333b1b3..8a65d64f0 100644 --- a/table/adaptive/adaptive_table_factory.cc +++ b/table/adaptive/adaptive_table_factory.cc @@ -58,7 +58,7 @@ Status AdaptiveTableFactory::NewTableReader( return plain_table_factory_->NewTableReader( table_reader_options, std::move(file), file_size, table); } else if (footer.table_magic_number() == kBlockBasedTableMagicNumber || - footer.table_magic_number() == kLegacyBlockBasedTableMagicNumber) { + footer.table_magic_number() == kLegacyBlockBasedTableMagicNumber) { return block_based_table_factory_->NewTableReader( ro, table_reader_options, std::move(file), file_size, table, prefetch_index_and_filter_in_cache); diff --git a/table/block_based/block_based_table_builder.cc b/table/block_based/block_based_table_builder.cc index 44f3bf3d0..62f0b3bb4 100644 --- a/table/block_based/block_based_table_builder.cc +++ b/table/block_based/block_based_table_builder.cc @@ -1744,7 +1744,7 @@ void BlockBasedTableBuilder::WritePropertiesBlock( } #endif // !NDEBUG - const std::string* properties_block_meta = &kPropertiesBlock; + const std::string* properties_block_meta = &kPropertiesBlockName; TEST_SYNC_POINT_CALLBACK( "BlockBasedTableBuilder::WritePropertiesBlock:Meta", &properties_block_meta); @@ -1769,7 +1769,7 @@ void BlockBasedTableBuilder::WriteCompressionDictBlock( #endif // NDEBUG } if (ok()) { - meta_index_builder->Add(kCompressionDictBlock, + meta_index_builder->Add(kCompressionDictBlockName, compression_dict_block_handle); } } @@ -1781,7 +1781,7 @@ void BlockBasedTableBuilder::WriteRangeDelBlock( BlockHandle range_del_block_handle; WriteRawBlock(rep_->range_del_block.Finish(), kNoCompression, &range_del_block_handle, BlockType::kRangeDeletion); - meta_index_builder->Add(kRangeDelBlock, range_del_block_handle); + meta_index_builder->Add(kRangeDelBlockName, range_del_block_handle); } } @@ -1799,14 +1799,16 @@ void BlockBasedTableBuilder::WriteFooter(BlockHandle& metaindex_block_handle, // this is guaranteed by BlockBasedTableBuilder's constructor assert(r->table_options.checksum == kCRC32c || r->table_options.format_version != 0); - Footer footer( - legacy ? kLegacyBlockBasedTableMagicNumber : kBlockBasedTableMagicNumber, - r->table_options.format_version); - footer.set_metaindex_handle(metaindex_block_handle); - footer.set_index_handle(index_block_handle); - footer.set_checksum(r->table_options.checksum); + Footer footer; + footer + .set_table_magic_number(legacy ? kLegacyBlockBasedTableMagicNumber + : kBlockBasedTableMagicNumber) + .set_format_version(r->table_options.format_version) + .set_metaindex_handle(metaindex_block_handle) + .set_index_handle(index_block_handle) + .set_checksum_type(r->table_options.checksum); std::string footer_encoding; - footer.EncodeTo(&footer_encoding); + footer.EncodeTo(&footer_encoding, r->get_offset()); assert(ok()); IOStatus ios = r->file->Append(footer_encoding); if (ios.ok()) { diff --git a/table/block_based/block_based_table_factory.cc b/table/block_based/block_based_table_factory.cc index 66bb44e52..08cd06b6c 100644 --- a/table/block_based/block_based_table_factory.cc +++ b/table/block_based/block_based_table_factory.cc @@ -650,7 +650,7 @@ Status BlockBasedTableFactory::ValidateOptions( "Enable pin_l0_filter_and_index_blocks_in_cache, " ", but block cache is disabled"); } - if (!BlockBasedTableSupportedVersion(table_options_.format_version)) { + if (!IsSupportedFormatVersion(table_options_.format_version)) { return Status::InvalidArgument( "Unsupported BlockBasedTable format_version. Please check " "include/rocksdb/table.h for more info"); diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index 1012b02a7..3ee3493b7 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -600,7 +600,7 @@ Status BlockBasedTable::Open( if (!s.ok()) { return s; } - if (!BlockBasedTableSupportedVersion(footer.version())) { + if (!IsSupportedFormatVersion(footer.format_version())) { return Status::Corruption( "Unknown Footer version. Maybe this file was created with newer " "version of RocksDB?"); @@ -757,7 +757,7 @@ Status BlockBasedTable::ReadPropertiesBlock( InternalIterator* meta_iter, const SequenceNumber largest_seqno) { Status s; BlockHandle handle; - s = FindOptionalMetaBlock(meta_iter, kPropertiesBlock, &handle); + s = FindOptionalMetaBlock(meta_iter, kPropertiesBlockName, &handle); if (!s.ok()) { ROCKS_LOG_WARN(rep_->ioptions.logger, @@ -856,7 +856,7 @@ Status BlockBasedTable::ReadRangeDelBlock( BlockCacheLookupContext* lookup_context) { Status s; BlockHandle range_del_handle; - s = FindOptionalMetaBlock(meta_iter, kRangeDelBlock, &range_del_handle); + s = FindOptionalMetaBlock(meta_iter, kRangeDelBlockName, &range_del_handle); if (!s.ok()) { ROCKS_LOG_WARN( rep_->ioptions.logger, @@ -925,7 +925,7 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks( rep_->index_type == BlockBasedTableOptions::kTwoLevelIndexSearch); // Find compression dictionary handle - s = FindOptionalMetaBlock(meta_iter, kCompressionDictBlock, + s = FindOptionalMetaBlock(meta_iter, kCompressionDictBlockName, &rep_->compression_dict_handle); if (!s.ok()) { return s; @@ -1808,7 +1808,7 @@ void BlockBasedTable::RetrieveMultipleBlocks( // begin address of each read request, we need to add the offset // in each read request. Checksum is stored in the block trailer, // beyond the payload size. - s = VerifyBlockChecksum(footer.checksum(), data + req_offset, + s = VerifyBlockChecksum(footer.checksum_type(), data + req_offset, handle.size(), rep_->file->file_name(), handle.offset()); TEST_SYNC_POINT_CALLBACK("RetrieveMultipleBlocks:VerifyChecksum", &s); @@ -1875,9 +1875,9 @@ void BlockBasedTable::RetrieveMultipleBlocks( if (compression_type != kNoCompression) { UncompressionContext context(compression_type); UncompressionInfo info(context, uncompression_dict, compression_type); - s = UncompressBlockContents(info, req.result.data() + req_offset, - handle.size(), &contents, footer.version(), - rep_->ioptions, memory_allocator); + s = UncompressBlockContents( + info, req.result.data() + req_offset, handle.size(), &contents, + footer.format_version(), rep_->ioptions, memory_allocator); } else { // There are two cases here: // 1) caller uses the shared buffer (scratch or direct io buffer); @@ -3008,15 +3008,15 @@ BlockType BlockBasedTable::GetBlockTypeForMetaBlockByName( return BlockType::kFilter; } - if (meta_block_name == kPropertiesBlock) { + if (meta_block_name == kPropertiesBlockName) { return BlockType::kProperties; } - if (meta_block_name == kCompressionDictBlock) { + if (meta_block_name == kCompressionDictBlockName) { return BlockType::kCompressionDictionary; } - if (meta_block_name == kRangeDelBlock) { + if (meta_block_name == kRangeDelBlockName) { return BlockType::kRangeDeletion; } @@ -3045,7 +3045,7 @@ Status BlockBasedTable::VerifyChecksumInMetaBlocks( s = handle.DecodeFrom(&input); BlockContents contents; const Slice meta_block_name = index_iter->key(); - if (meta_block_name == kPropertiesBlock) { + if (meta_block_name == kPropertiesBlockName) { // Unfortunate special handling for properties block checksum w/ // global seqno std::unique_ptr table_properties; @@ -3111,8 +3111,8 @@ bool BlockBasedTable::TEST_KeyInCache(const ReadOptions& options, // 5. index_type Status BlockBasedTable::CreateIndexReader( const ReadOptions& ro, FilePrefetchBuffer* prefetch_buffer, - InternalIterator* preloaded_meta_index_iter, bool use_cache, bool prefetch, - bool pin, BlockCacheLookupContext* lookup_context, + InternalIterator* meta_iter, bool use_cache, bool prefetch, bool pin, + BlockCacheLookupContext* lookup_context, std::unique_ptr* index_reader) { // kHashSearch requires non-empty prefix_extractor but bypass checking // prefix_extractor here since we have no access to MutableCFOptions. @@ -3136,25 +3136,12 @@ Status BlockBasedTable::CreateIndexReader( case BlockBasedTableOptions::kHashSearch: { std::unique_ptr metaindex_guard; std::unique_ptr metaindex_iter_guard; - auto meta_index_iter = preloaded_meta_index_iter; bool should_fallback = false; if (rep_->internal_prefix_transform.get() == nullptr) { ROCKS_LOG_WARN(rep_->ioptions.logger, "No prefix extractor passed in. Fall back to binary" " search index."); should_fallback = true; - } else if (meta_index_iter == nullptr) { - auto s = ReadMetaIndexBlock(ro, prefetch_buffer, &metaindex_guard, - &metaindex_iter_guard); - if (!s.ok()) { - // we simply fall back to binary search in case there is any - // problem with prefix hash index loading. - ROCKS_LOG_WARN(rep_->ioptions.logger, - "Unable to read the metaindex block." - " Fall back to binary search index."); - should_fallback = true; - } - meta_index_iter = metaindex_iter_guard.get(); } if (should_fallback) { @@ -3162,9 +3149,9 @@ Status BlockBasedTable::CreateIndexReader( use_cache, prefetch, pin, lookup_context, index_reader); } else { - return HashIndexReader::Create(this, ro, prefetch_buffer, - meta_index_iter, use_cache, prefetch, - pin, lookup_context, index_reader); + return HashIndexReader::Create(this, ro, prefetch_buffer, meta_iter, + use_cache, prefetch, pin, lookup_context, + index_reader); } } default: { @@ -3357,17 +3344,17 @@ Status BlockBasedTable::DumpTable(WritableFile* out_file) { if (!s.ok()) { return s; } - if (metaindex_iter->key() == kPropertiesBlock) { + if (metaindex_iter->key() == kPropertiesBlockName) { out_stream << " Properties block handle: " << metaindex_iter->value().ToString(true) << "\n"; - } else if (metaindex_iter->key() == kCompressionDictBlock) { + } else if (metaindex_iter->key() == kCompressionDictBlockName) { out_stream << " Compression dictionary block handle: " << metaindex_iter->value().ToString(true) << "\n"; } else if (strstr(metaindex_iter->key().ToString().c_str(), "filter.rocksdb.") != nullptr) { out_stream << " Filter block handle: " << metaindex_iter->value().ToString(true) << "\n"; - } else if (metaindex_iter->key() == kRangeDelBlock) { + } else if (metaindex_iter->key() == kRangeDelBlockName) { out_stream << " Range deletion block handle: " << metaindex_iter->value().ToString(true) << "\n"; } diff --git a/table/block_based/block_based_table_reader.h b/table/block_based/block_based_table_reader.h index 8efcd7e09..b8bd9f761 100644 --- a/table/block_based/block_based_table_reader.h +++ b/table/block_based/block_based_table_reader.h @@ -20,6 +20,7 @@ #include "table/block_based/filter_block.h" #include "table/block_based/uncompression_dict_reader.h" #include "table/format.h" +#include "table/persistent_cache_options.h" #include "table/table_properties_internal.h" #include "table/table_reader.h" #include "table/two_level_iterator.h" diff --git a/table/block_based/partitioned_filter_block_test.cc b/table/block_based/partitioned_filter_block_test.cc index 7def5f250..d7ae4bfd1 100644 --- a/table/block_based/partitioned_filter_block_test.cc +++ b/table/block_based/partitioned_filter_block_test.cc @@ -3,15 +3,15 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). +#include "table/block_based/partitioned_filter_block.h" + #include -#include "rocksdb/filter_policy.h" - -#include "table/block_based/block_based_table_reader.h" -#include "table/block_based/partitioned_filter_block.h" -#include "table/block_based/filter_policy_internal.h" - #include "index_builder.h" +#include "rocksdb/filter_policy.h" +#include "table/block_based/block_based_table_reader.h" +#include "table/block_based/filter_policy_internal.h" +#include "table/format.h" #include "test_util/testharness.h" #include "test_util/testutil.h" #include "util/coding.h" @@ -292,10 +292,11 @@ class PartitionedFilterBlockTest } }; -INSTANTIATE_TEST_CASE_P(FormatDef, PartitionedFilterBlockTest, - testing::Values(test::kDefaultFormatVersion)); -INSTANTIATE_TEST_CASE_P(FormatLatest, PartitionedFilterBlockTest, - testing::Values(test::kLatestFormatVersion)); +// Format versions potentially intersting to partitioning +INSTANTIATE_TEST_CASE_P(FormatVersions, PartitionedFilterBlockTest, + testing::ValuesIn(std::set{ + 2, 3, 4, test::kDefaultFormatVersion, + kLatestFormatVersion})); TEST_P(PartitionedFilterBlockTest, EmptyBuilder) { std::unique_ptr pib(NewIndexBuilder()); diff --git a/table/block_fetcher.cc b/table/block_fetcher.cc index 5283b1aa5..54604238c 100644 --- a/table/block_fetcher.cc +++ b/table/block_fetcher.cc @@ -32,9 +32,9 @@ inline void BlockFetcher::ProcessTrailerIfPresent() { if (footer_.GetBlockTrailerSize() > 0) { assert(footer_.GetBlockTrailerSize() == BlockBasedTable::kBlockTrailerSize); if (read_options_.verify_checksums) { - io_status_ = status_to_io_status( - VerifyBlockChecksum(footer_.checksum(), slice_.data(), block_size_, - file_->file_name(), handle_.offset())); + io_status_ = status_to_io_status(VerifyBlockChecksum( + footer_.checksum_type(), slice_.data(), block_size_, + file_->file_name(), handle_.offset())); } compression_type_ = BlockBasedTable::GetBlockCompressionType(slice_.data(), block_size_); @@ -315,7 +315,7 @@ IOStatus BlockFetcher::ReadBlockContents() { UncompressionContext context(compression_type_); UncompressionInfo info(context, uncompression_dict_, compression_type_); io_status_ = status_to_io_status(UncompressBlockContents( - info, slice_.data(), block_size_, contents_, footer_.version(), + info, slice_.data(), block_size_, contents_, footer_.format_version(), ioptions_, memory_allocator_)); #ifndef NDEBUG num_heap_buf_memcpy_++; diff --git a/table/block_fetcher.h b/table/block_fetcher.h index 67ca35840..355cb53d0 100644 --- a/table/block_fetcher.h +++ b/table/block_fetcher.h @@ -12,6 +12,7 @@ #include "table/block_based/block.h" #include "table/block_based/block_type.h" #include "table/format.h" +#include "table/persistent_cache_options.h" namespace ROCKSDB_NAMESPACE { diff --git a/table/cuckoo/cuckoo_table_builder.cc b/table/cuckoo/cuckoo_table_builder.cc index 2a4e2536a..0707ddf10 100644 --- a/table/cuckoo/cuckoo_table_builder.cc +++ b/table/cuckoo/cuckoo_table_builder.cc @@ -381,7 +381,7 @@ Status CuckooTableBuilder::Finish() { return status_; } - meta_index_builder.Add(kPropertiesBlock, property_block_handle); + meta_index_builder.Add(kPropertiesBlockName, property_block_handle); Slice meta_index_block = meta_index_builder.Finish(); BlockHandle meta_index_block_handle; @@ -393,11 +393,14 @@ Status CuckooTableBuilder::Finish() { return status_; } - Footer footer(kCuckooTableMagicNumber, 1); - footer.set_metaindex_handle(meta_index_block_handle); - footer.set_index_handle(BlockHandle::NullBlockHandle()); + Footer footer; + footer.set_table_magic_number(kCuckooTableMagicNumber) + .set_format_version(1) + .set_metaindex_handle(meta_index_block_handle) + .set_index_handle(BlockHandle::NullBlockHandle()) + .set_checksum_type(kNoChecksum); std::string footer_encoding; - footer.EncodeTo(&footer_encoding); + footer.EncodeTo(&footer_encoding, offset); io_status_ = file_->Append(footer_encoding); status_ = io_status_; return status_; diff --git a/table/format.cc b/table/format.cc index 10dbd3f14..3cf91b698 100644 --- a/table/format.cc +++ b/table/format.cc @@ -20,9 +20,11 @@ #include "options/options_helper.h" #include "rocksdb/env.h" #include "rocksdb/options.h" +#include "rocksdb/table.h" #include "table/block_based/block.h" #include "table/block_based/block_based_table_reader.h" #include "table/persistent_cache_helper.h" +#include "util/cast_util.h" #include "util/coding.h" #include "util/compression.h" #include "util/crc32c.h" @@ -58,6 +60,15 @@ void BlockHandle::EncodeTo(std::string* dst) const { PutVarint64Varint64(dst, offset_, size_); } +char* BlockHandle::EncodeTo(char* dst) const { + // Sanity check that all fields have been set + assert(offset_ != ~uint64_t{0}); + assert(size_ != ~uint64_t{0}); + char* cur = EncodeVarint64(dst, offset_); + cur = EncodeVarint64(cur, size_); + return cur; +} + Status BlockHandle::DecodeFrom(Slice* input) { if (GetVarint64(input, &offset_) && GetVarint64(input, &size_)) { return Status::OK(); @@ -166,8 +177,8 @@ inline uint64_t UpconvertLegacyFooterFormat(uint64_t magic_number) { } } // namespace -void Footer::set_table_magic_number(uint64_t magic_number) { - assert(!HasInitializedTableMagicNumber()); +Footer& Footer::set_table_magic_number(uint64_t magic_number) { + assert(table_magic_number_ == kNullTableMagicNumber); table_magic_number_ = magic_number; if (magic_number == kBlockBasedTableMagicNumber || magic_number == kLegacyBlockBasedTableMagicNumber) { @@ -176,64 +187,80 @@ void Footer::set_table_magic_number(uint64_t magic_number) { } else { block_trailer_size_ = 0; } + return *this; } -// legacy footer format: -// metaindex handle (varint64 offset, varint64 size) -// index handle (varint64 offset, varint64 size) -// to make the total size 2 * BlockHandle::kMaxEncodedLength -// table_magic_number (8 bytes) -// new footer format: -// checksum type (char, 1 byte) -// metaindex handle (varint64 offset, varint64 size) -// index handle (varint64 offset, varint64 size) -// to make the total size 2 * BlockHandle::kMaxEncodedLength + 1 -// footer version (4 bytes) -// table_magic_number (8 bytes) -void Footer::EncodeTo(std::string* dst) const { - assert(HasInitializedTableMagicNumber()); - if (IsLegacyFooterFormat(table_magic_number())) { - // has to be default checksum with legacy footer - assert(checksum_ == kCRC32c); - const size_t original_size = dst->size(); - metaindex_handle_.EncodeTo(dst); - index_handle_.EncodeTo(dst); - dst->resize(original_size + 2 * BlockHandle::kMaxEncodedLength); // Padding - PutFixed32(dst, static_cast(table_magic_number() & 0xffffffffu)); - PutFixed32(dst, static_cast(table_magic_number() >> 32)); - assert(dst->size() == original_size + kVersion0EncodedLength); +// Footer format, in three parts: +// * Part1 +// -> format_version == 0 (inferred from legacy magic number) +// (0 bytes) +// -> format_version >= 1 +// checksum type (char, 1 byte) +// * Part2 +// metaindex handle (varint64 offset, varint64 size) +// index handle (varint64 offset, varint64 size) +// for part2 size = 2 * BlockHandle::kMaxEncodedLength = 40 +// * Part3 +// -> format_version == 0 (inferred from legacy magic number) +// legacy magic number (8 bytes) +// -> format_version >= 1 (inferred from NOT legacy magic number) +// format_version (uint32LE, 4 bytes), also called "footer version" +// newer magic number (8 bytes) +void Footer::EncodeTo(std::string* dst, uint64_t footer_offset) const { + (void)footer_offset; // Future use + + // Sanitize magic numbers & format versions + assert(table_magic_number_ != kNullTableMagicNumber); + uint64_t magic = table_magic_number_; + uint32_t fv = format_version_; + assert(fv != kInvalidFormatVersion); + assert(IsLegacyFooterFormat(magic) == (fv == 0)); + + ChecksumType ct = checksum_type(); + + // Allocate destination data and generate parts 1 and 3 + const size_t original_size = dst->size(); + char* part2; + if (fv > 0) { + dst->resize(original_size + kNewVersionsEncodedLength); + char* part1 = &(*dst)[original_size]; + part2 = part1 + 1; + char* part3 = part2 + 2 * BlockHandle::kMaxEncodedLength; + assert(&(*dst)[dst->size() - 1] + 1 - part3 == /* part 3 size */ 12); + // Generate parts 1 and 3 + part1[0] = ct; + EncodeFixed32(part3, fv); + EncodeFixed64(part3 + 4, magic); } else { - const size_t original_size = dst->size(); - dst->push_back(static_cast(checksum_)); - metaindex_handle_.EncodeTo(dst); - index_handle_.EncodeTo(dst); - dst->resize(original_size + kNewVersionsEncodedLength - 12); // Padding - PutFixed32(dst, version()); - PutFixed32(dst, static_cast(table_magic_number() & 0xffffffffu)); - PutFixed32(dst, static_cast(table_magic_number() >> 32)); - assert(dst->size() == original_size + kNewVersionsEncodedLength); + dst->resize(original_size + kVersion0EncodedLength); + part2 = &(*dst)[original_size]; + char* part3 = part2 + 2 * BlockHandle::kMaxEncodedLength; + assert(&(*dst)[dst->size() - 1] + 1 - part3 == /* part 3 size */ 8); + // Legacy SST files use kCRC32c checksum but it's not stored in footer. + assert(ct == kNoChecksum || ct == kCRC32c); + // Generate part 3 (part 1 empty) + EncodeFixed64(part3, magic); } + + // Generate Part2 + // Variable size encode handles (sigh) + part2 = metaindex_handle_.EncodeTo(part2); + /*part2 = */ index_handle_.EncodeTo(part2); + + // remainder of part2 is already zero padded } -Footer::Footer(uint64_t _table_magic_number, uint32_t _version) - : version_(_version), - checksum_(kCRC32c), - table_magic_number_(_table_magic_number) { - // This should be guaranteed by constructor callers - assert(!IsLegacyFooterFormat(_table_magic_number) || version_ == 0); -} +Status Footer::DecodeFrom(Slice* input, uint64_t input_offset) { + (void)input_offset; // Future use -Status Footer::DecodeFrom(Slice* input) { - assert(!HasInitializedTableMagicNumber()); + // Only decode to unused Footer + assert(table_magic_number_ == kNullTableMagicNumber); assert(input != nullptr); assert(input->size() >= kMinEncodedLength); const char* magic_ptr = input->data() + input->size() - kMagicNumberLengthByte; - const uint32_t magic_lo = DecodeFixed32(magic_ptr); - const uint32_t magic_hi = DecodeFixed32(magic_ptr + 4); - uint64_t magic = ((static_cast(magic_hi) << 32) | - (static_cast(magic_lo))); + uint64_t magic = DecodeFixed64(magic_ptr); // We check for legacy formats here and silently upconvert them bool legacy = IsLegacyFooterFormat(magic); @@ -242,44 +269,51 @@ Status Footer::DecodeFrom(Slice* input) { } set_table_magic_number(magic); + // Parse Part3 if (legacy) { // The size is already asserted to be at least kMinEncodedLength // at the beginning of the function input->remove_prefix(input->size() - kVersion0EncodedLength); - version_ = 0 /* legacy */; - checksum_ = kCRC32c; + format_version_ = 0 /* legacy */; + checksum_type_ = kCRC32c; } else { - version_ = DecodeFixed32(magic_ptr - 4); - // Footer version 1 and higher will always occupy exactly this many bytes. - // It consists of the checksum type, two block handles, padding, - // a version number, and a magic number + const char* part3_ptr = magic_ptr - 4; + format_version_ = DecodeFixed32(part3_ptr); + if (!IsSupportedFormatVersion(format_version_)) { + return Status::Corruption("Corrupt or unsupported format_version: " + + ROCKSDB_NAMESPACE::ToString(format_version_)); + } + // All known format versions >= 1 occupy exactly this many bytes. if (input->size() < kNewVersionsEncodedLength) { - return Status::Corruption("input is too short to be an sstable"); - } else { - input->remove_prefix(input->size() - kNewVersionsEncodedLength); + return Status::Corruption("Input is too short to be an SST file"); } - uint32_t chksum; - if (!GetVarint32(input, &chksum)) { - return Status::Corruption("bad checksum type"); - } - checksum_ = static_cast(chksum); - if (chksum != static_cast(checksum_) || - !IsSupportedChecksumType(checksum_)) { - return Status::Corruption("unknown checksum type " + - ROCKSDB_NAMESPACE::ToString(chksum)); + uint64_t adjustment = input->size() - kNewVersionsEncodedLength; + input->remove_prefix(adjustment); + + // Parse Part1 + char chksum = input->data()[0]; + checksum_type_ = lossless_cast(chksum); + if (!IsSupportedChecksumType(checksum_type())) { + return Status::Corruption( + "Corrupt or unsupported checksum type: " + + ROCKSDB_NAMESPACE::ToString(lossless_cast(chksum))); } + // Consume checksum type field + input->remove_prefix(1); } + // Parse Part2 Status result = metaindex_handle_.DecodeFrom(input); if (result.ok()) { result = index_handle_.DecodeFrom(input); } - if (result.ok()) { - // We skip over any leftover data (just padding for now) in "input" - const char* end = magic_ptr + kMagicNumberLengthByte; - *input = Slice(end, input->data() + input->size() - end); + if (!result.ok()) { + return result; } - return result; + + // Mark all input consumed (skip padding & part3) + *input = Slice(input->data() + input->size(), 0U); + return Status::OK(); } std::string Footer::ToString() const { @@ -293,14 +327,12 @@ std::string Footer::ToString() const { result.append("table_magic_number: " + ROCKSDB_NAMESPACE::ToString(table_magic_number_) + "\n "); } else { - result.append("checksum: " + ROCKSDB_NAMESPACE::ToString(checksum_) + - "\n "); result.append("metaindex handle: " + metaindex_handle_.ToString() + "\n "); result.append("index handle: " + index_handle_.ToString() + "\n "); - result.append("footer version: " + ROCKSDB_NAMESPACE::ToString(version_) + - "\n "); result.append("table_magic_number: " + ROCKSDB_NAMESPACE::ToString(table_magic_number_) + "\n "); + result.append("format version: " + + ROCKSDB_NAMESPACE::ToString(format_version_) + "\n "); } return result; } @@ -319,10 +351,9 @@ Status ReadFooterFromFile(const IOOptions& opts, RandomAccessFileReader* file, std::string footer_buf; AlignedBuf internal_buf; Slice footer_input; - size_t read_offset = - (file_size > Footer::kMaxEncodedLength) - ? static_cast(file_size - Footer::kMaxEncodedLength) - : 0; + uint64_t read_offset = (file_size > Footer::kMaxEncodedLength) + ? file_size - Footer::kMaxEncodedLength + : 0; Status s; // TODO: Need to pass appropriate deadline to TryReadFromCache(). Right now, // there is no readahead for point lookups, so TryReadFromCache will fail if @@ -353,7 +384,7 @@ Status ReadFooterFromFile(const IOOptions& opts, RandomAccessFileReader* file, file->file_name()); } - s = footer->DecodeFrom(&footer_input); + s = footer->DecodeFrom(&footer_input, read_offset); if (!s.ok()) { return s; } @@ -376,7 +407,7 @@ inline uint32_t ModifyChecksumForLastByte(uint32_t checksum, char last_byte) { // more byte, except we don't need to re-mix the input checksum as long as // we do this step only once (per checksum). const uint32_t kRandomPrime = 0x6b9083d9; - return checksum ^ static_cast(last_byte) * kRandomPrime; + return checksum ^ lossless_cast(last_byte) * kRandomPrime; } } // namespace diff --git a/table/format.h b/table/format.h index c64c1ebea..b32ab324e 100644 --- a/table/format.h +++ b/table/format.h @@ -8,21 +8,20 @@ // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once -#include + +#include #include + #include "file/file_prefetch_buffer.h" #include "file/random_access_file_reader.h" - -#include "rocksdb/options.h" -#include "rocksdb/slice.h" -#include "rocksdb/status.h" -#include "rocksdb/table.h" - #include "memory/memory_allocator.h" #include "options/cf_options.h" #include "port/malloc.h" #include "port/port.h" // noexcept -#include "table/persistent_cache_options.h" +#include "rocksdb/slice.h" +#include "rocksdb/status.h" +#include "rocksdb/table.h" +#include "util/hash.h" namespace ROCKSDB_NAMESPACE { @@ -32,7 +31,7 @@ struct ReadOptions; extern bool ShouldReportDetailedTime(Env* env, Statistics* stats); // the length of the magic number in bytes. -const int kMagicNumberLengthByte = 8; +constexpr uint32_t kMagicNumberLengthByte = 8; // BlockHandle is a pointer to the extent of a file that stores a data // block or a meta block. @@ -52,6 +51,7 @@ class BlockHandle { void set_size(uint64_t _size) { size_ = _size; } void EncodeTo(std::string* dst) const; + char* EncodeTo(char* dst) const; Status DecodeFrom(Slice* input); Status DecodeSizeFrom(uint64_t offset, Slice* input); @@ -65,7 +65,7 @@ class BlockHandle { static const BlockHandle& NullBlockHandle() { return kNullBlockHandle; } // Maximum encoding length of a BlockHandle - enum { kMaxEncodedLength = 10 + 10 }; + static constexpr uint32_t kMaxEncodedLength = 2 * kMaxVarint64Length; inline bool operator==(const BlockHandle& rhs) const { return offset_ == rhs.offset_ && size_ == rhs.size_; @@ -117,94 +117,107 @@ inline uint32_t GetCompressFormatForVersion(uint32_t format_version) { return format_version >= 2 ? 2 : 1; } -inline bool BlockBasedTableSupportedVersion(uint32_t version) { - return version <= 5; +constexpr uint32_t kLatestFormatVersion = 5; + +inline bool IsSupportedFormatVersion(uint32_t version) { + return version <= kLatestFormatVersion; } -// Footer encapsulates the fixed information stored at the tail -// end of every table file. +// Footer encapsulates the fixed information stored at the tail end of every +// SST file. In general, it should only include things that cannot go +// elsewhere under the metaindex block. For example, checksum_type is +// required for verifying metaindex block checksum (when applicable), but +// index block handle can easily go in metaindex block (possible future). class Footer { public: - // Constructs a footer without specifying its table magic number. - // In such case, the table magic number of such footer should be - // initialized via @ReadFooterFromFile(). - // Use this when you plan to load Footer with DecodeFrom(). Never use this - // when you plan to EncodeTo. - Footer() : Footer(kInvalidTableMagicNumber, 0) {} + Footer() {} - // Use this constructor when you plan to write out the footer using - // EncodeTo(). Never use this constructor with DecodeFrom(). - // `version` is same as `format_version` for block-based table. - Footer(uint64_t table_magic_number, uint32_t version); - - // The version of the footer in this file - uint32_t version() const { return version_; } - - // The checksum type used in this file - ChecksumType checksum() const { return checksum_; } - void set_checksum(const ChecksumType c) { checksum_ = c; } - - // The block handle for the metaindex block of the table - const BlockHandle& metaindex_handle() const { return metaindex_handle_; } - void set_metaindex_handle(const BlockHandle& h) { metaindex_handle_ = h; } - - // The block handle for the index block of the table - const BlockHandle& index_handle() const { return index_handle_; } - - void set_index_handle(const BlockHandle& h) { index_handle_ = h; } + // Uses builder pattern rather than distinctive ctors + // Table magic number identifies file as RocksDB SST file and which kind of + // SST format is use. + Footer& set_table_magic_number(uint64_t tmn); uint64_t table_magic_number() const { return table_magic_number_; } - void EncodeTo(std::string* dst) const; + // A version (footer and more) within a kind of SST. (It would add more + // unnecessary complexity to separate footer versions and + // BBTO::format_version.) + Footer& set_format_version(uint32_t fv) { + format_version_ = fv; + return *this; + } + uint32_t format_version() const { return format_version_; } - // Set the current footer based on the input slice. - // - // REQUIRES: table_magic_number_ is not set (i.e., - // HasInitializedTableMagicNumber() is true). The function will initialize the - // magic number - Status DecodeFrom(Slice* input); + // Block handle for metaindex block. + Footer& set_metaindex_handle(const BlockHandle& h) { + metaindex_handle_ = h; + return *this; + } + const BlockHandle& metaindex_handle() const { return metaindex_handle_; } - // Encoded length of a Footer. Note that the serialization of a Footer will - // always occupy at least kMinEncodedLength bytes. If fields are changed - // the version number should be incremented and kMaxEncodedLength should be - // increased accordingly. - enum { - // Footer version 0 (legacy) will always occupy exactly this many bytes. - // It consists of two block handles, padding, and a magic number. - kVersion0EncodedLength = 2 * BlockHandle::kMaxEncodedLength + 8, - // Footer of versions 1 and higher will always occupy exactly this many - // bytes. It consists of the checksum type, two block handles, padding, - // a version number (bigger than 1), and a magic number - kNewVersionsEncodedLength = 1 + 2 * BlockHandle::kMaxEncodedLength + 4 + 8, - kMinEncodedLength = kVersion0EncodedLength, - kMaxEncodedLength = kNewVersionsEncodedLength, - }; + // Block handle for (top-level) index block. + Footer& set_index_handle(const BlockHandle& h) { + index_handle_ = h; + return *this; + } + const BlockHandle& index_handle() const { return index_handle_; } - static const uint64_t kInvalidTableMagicNumber = 0; + // Checksum type used in the file. + Footer& set_checksum_type(ChecksumType ct) { + checksum_type_ = ct; + return *this; + } + ChecksumType checksum_type() const { + return static_cast(checksum_type_); + } - // convert this object to a human readable form + // Appends serialized footer to `dst`. The starting offset of the footer + // within the file is required for future work. + void EncodeTo(std::string* dst, uint64_t footer_offset) const; + + // Deserialize a footer (populate fields) from `input` and check for various + // corruptions. On success (and some error cases) `input` is advanced past + // the footer. Like EncodeTo, the offset within the file will be nedded for + // future work + Status DecodeFrom(Slice* input, uint64_t input_offset); + + // Convert this object to a human readable form std::string ToString() const; // Block trailer size used by file with this footer (e.g. 5 for block-based // table and 0 for plain table) inline size_t GetBlockTrailerSize() const { return block_trailer_size_; } + // Encoded lengths of Footers. Bytes for serialized Footer will always be + // >= kMinEncodedLength and <= kMaxEncodedLength. + // + // Footer version 0 (legacy) will always occupy exactly this many bytes. + // It consists of two block handles, padding, and a magic number. + static constexpr uint32_t kVersion0EncodedLength = + 2 * BlockHandle::kMaxEncodedLength + kMagicNumberLengthByte; + static constexpr uint32_t kMinEncodedLength = kVersion0EncodedLength; + + // Footer of versions 1 and higher will always occupy exactly this many + // bytes. It originally consisted of the checksum type, two block handles, + // padding (to maximum handle encoding size), a format version number, and a + // magic number. + static constexpr uint32_t kNewVersionsEncodedLength = + 1 + 2 * BlockHandle::kMaxEncodedLength + 4 + kMagicNumberLengthByte; + static constexpr uint32_t kMaxEncodedLength = kNewVersionsEncodedLength; + + static constexpr uint64_t kNullTableMagicNumber = 0; + private: - // REQUIRES: magic number wasn't initialized. - void set_table_magic_number(uint64_t magic_number); + static constexpr uint32_t kInvalidFormatVersion = 0xffffffffU; + static constexpr int kInvalidChecksumType = + (1 << (sizeof(ChecksumType) * 8)) | kNoChecksum; - // return true if @table_magic_number_ is set to a value different - // from @kInvalidTableMagicNumber. - bool HasInitializedTableMagicNumber() const { - return (table_magic_number_ != kInvalidTableMagicNumber); - } - - uint32_t version_; - ChecksumType checksum_; - uint8_t block_trailer_size_ = 0; // set based on magic number + uint64_t table_magic_number_ = kNullTableMagicNumber; + uint32_t format_version_ = kInvalidFormatVersion; BlockHandle metaindex_handle_; BlockHandle index_handle_; - uint64_t table_magic_number_ = 0; + int checksum_type_ = kInvalidChecksumType; + uint8_t block_trailer_size_ = 0; // set based on magic number }; // Read the footer from file diff --git a/table/meta_blocks.cc b/table/meta_blocks.cc index 7ce118a5a..80e757295 100644 --- a/table/meta_blocks.cc +++ b/table/meta_blocks.cc @@ -26,11 +26,11 @@ namespace ROCKSDB_NAMESPACE { -const std::string kPropertiesBlock = "rocksdb.properties"; +const std::string kPropertiesBlockName = "rocksdb.properties"; // Old property block name for backward compatibility const std::string kPropertiesBlockOldName = "rocksdb.stats"; -const std::string kCompressionDictBlock = "rocksdb.compression_dict"; -const std::string kRangeDelBlock = "rocksdb.range_del"; +const std::string kCompressionDictBlockName = "rocksdb.compression_dict"; +const std::string kRangeDelBlockName = "rocksdb.range_del"; MetaIndexBuilder::MetaIndexBuilder() : meta_index_block_(new BlockBuilder(1 /* restart interval */)) {} @@ -381,7 +381,7 @@ Status ReadTablePropertiesHelper( // Modified version of BlockFetcher checksum verification // (See write_global_seqno comment above) if (s.ok() && footer.GetBlockTrailerSize() > 0) { - s = VerifyBlockChecksum(footer.checksum(), properties_block.data(), + s = VerifyBlockChecksum(footer.checksum_type(), properties_block.data(), block_size, file->file_name(), handle.offset()); if (s.IsCorruption()) { if (new_table_properties->external_sst_file_global_seqno_offset != 0) { @@ -391,8 +391,8 @@ Status ReadTablePropertiesHelper( new_table_properties->external_sst_file_global_seqno_offset - handle.offset(); EncodeFixed64(&tmp_buf[static_cast(global_seqno_offset)], 0); - s = VerifyBlockChecksum(footer.checksum(), tmp_buf.data(), block_size, - file->file_name(), handle.offset()); + s = VerifyBlockChecksum(footer.checksum_type(), tmp_buf.data(), + block_size, file->file_name(), handle.offset()); } } } @@ -413,7 +413,7 @@ Status ReadTableProperties(RandomAccessFileReader* file, uint64_t file_size, BlockHandle block_handle; Footer footer; Status s = FindMetaBlockInFile(file, file_size, table_magic_number, ioptions, - kPropertiesBlock, &block_handle, + kPropertiesBlockName, &block_handle, memory_allocator, prefetch_buffer, &footer); if (!s.ok()) { return s; @@ -438,7 +438,7 @@ Status FindOptionalMetaBlock(InternalIterator* meta_index_iter, if (meta_index_iter->Valid() && meta_index_iter->key() == meta_block_name) { Slice v = meta_index_iter->value(); return block_handle->DecodeFrom(&v); - } else if (meta_block_name == kPropertiesBlock) { + } else if (meta_block_name == kPropertiesBlockName) { // Have to try old name for compatibility meta_index_iter->Seek(kPropertiesBlockOldName); if (meta_index_iter->status().ok() && meta_index_iter->Valid() && diff --git a/table/meta_blocks.h b/table/meta_blocks.h index 3d24d5c31..88e688390 100644 --- a/table/meta_blocks.h +++ b/table/meta_blocks.h @@ -31,10 +31,10 @@ class RandomAccessFile; struct TableProperties; // Meta block names for metaindex -extern const std::string kPropertiesBlock; +extern const std::string kPropertiesBlockName; extern const std::string kPropertiesBlockOldName; -extern const std::string kCompressionDictBlock; -extern const std::string kRangeDelBlock; +extern const std::string kCompressionDictBlockName; +extern const std::string kRangeDelBlockName; class MetaIndexBuilder { public: diff --git a/table/plain/plain_table_builder.cc b/table/plain/plain_table_builder.cc index 93edbbc23..9f09f8349 100644 --- a/table/plain/plain_table_builder.cc +++ b/table/plain/plain_table_builder.cc @@ -279,7 +279,7 @@ Status PlainTableBuilder::Finish() { if (!s.ok()) { return std::move(s); } - meta_index_builer.Add(kPropertiesBlock, property_block_handle); + meta_index_builer.Add(kPropertiesBlockName, property_block_handle); // -- write metaindex block BlockHandle metaindex_block_handle; @@ -292,11 +292,13 @@ Status PlainTableBuilder::Finish() { // Write Footer // no need to write out new footer if we're using default checksum - Footer footer(kLegacyPlainTableMagicNumber, 0); - footer.set_metaindex_handle(metaindex_block_handle); - footer.set_index_handle(BlockHandle::NullBlockHandle()); + Footer footer; + footer.set_table_magic_number(kLegacyPlainTableMagicNumber) + .set_format_version(0) + .set_metaindex_handle(metaindex_block_handle) + .set_index_handle(BlockHandle::NullBlockHandle()); std::string footer_encoding; - footer.EncodeTo(&footer_encoding); + footer.EncodeTo(&footer_encoding, offset_); io_status_ = file_->Append(footer_encoding); if (io_status_.ok()) { offset_ += footer_encoding.size(); diff --git a/table/sst_file_dumper.cc b/table/sst_file_dumper.cc index 8a2be95a6..8d36717df 100644 --- a/table/sst_file_dumper.cc +++ b/table/sst_file_dumper.cc @@ -74,7 +74,7 @@ Status SstFileDumper::GetTableReader(const std::string& file_path) { // Warning about 'magic_number' being uninitialized shows up only in UBsan // builds. Though access is guarded by 's.ok()' checks, fix the issue to // avoid any warnings. - uint64_t magic_number = Footer::kInvalidTableMagicNumber; + uint64_t magic_number = Footer::kNullTableMagicNumber; // read table magic number Footer footer; diff --git a/table/table_test.cc b/table/table_test.cc index 5054c99bd..aa0b3b90f 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -21,16 +21,15 @@ #include #include -#include "block_fetcher.h" #include "cache/lru_cache.h" #include "db/dbformat.h" #include "db/memtable.h" #include "db/write_batch_internal.h" #include "memtable/stl_wrappers.h" -#include "meta_blocks.h" #include "monitoring/statistics.h" #include "options/options_helper.h" #include "port/port.h" +#include "port/stack_trace.h" #include "rocksdb/cache.h" #include "rocksdb/compression_type.h" #include "rocksdb/db.h" @@ -53,9 +52,11 @@ #include "table/block_based/block_based_table_reader.h" #include "table/block_based/block_builder.h" #include "table/block_based/flush_block_policy.h" +#include "table/block_fetcher.h" #include "table/format.h" #include "table/get_context.h" #include "table/internal_iterator.h" +#include "table/meta_blocks.h" #include "table/plain/plain_table_factory.h" #include "table/scoped_arena_iterator.h" #include "table/sst_file_writer_collectors.h" @@ -1356,10 +1357,8 @@ class FileChecksumTestHelper { uint64_t FileChecksumTestHelper::checksum_uniq_id_ = 1; -INSTANTIATE_TEST_CASE_P(FormatDef, BlockBasedTableTest, - testing::Values(test::kDefaultFormatVersion)); -INSTANTIATE_TEST_CASE_P(FormatLatest, BlockBasedTableTest, - testing::Values(test::kLatestFormatVersion)); +INSTANTIATE_TEST_CASE_P(FormatVersions, BlockBasedTableTest, + testing::ValuesIn(test::kFooterFormatVersionsToTest)); // This test serves as the living tutorial for the prefix scan of user collected // properties. @@ -2228,7 +2227,8 @@ TEST_P(BlockBasedTableTest, BadChecksumType) { const MutableCFOptions new_moptions(options); Status s = c.Reopen(new_ioptions, new_moptions); ASSERT_NOK(s); - ASSERT_MATCHES_REGEX(s.ToString(), "Corruption: unknown checksum type 123.*"); + ASSERT_EQ(s.ToString(), + "Corruption: Corrupt or unsupported checksum type: 123"); } namespace { @@ -4166,106 +4166,107 @@ TEST_P(ParameterizedHarnessTest, SimpleSpecialKey) { } TEST(TableTest, FooterTests) { + Random* r = Random::GetTLSInstance(); + uint64_t data_size = (uint64_t{1} << r->Uniform(40)) + r->Uniform(100); + uint64_t index_size = r->Uniform(1000000000); + uint64_t metaindex_size = r->Uniform(1000000); + // 5 == block trailer size + BlockHandle index(data_size + 5, index_size); + BlockHandle meta_index(data_size + index_size + 2 * 5, metaindex_size); + uint64_t footer_offset = data_size + metaindex_size + index_size + 3 * 5; { // upconvert legacy block based std::string encoded; - Footer footer(kLegacyBlockBasedTableMagicNumber, 0); - BlockHandle meta_index(10, 5), index(20, 15); - footer.set_metaindex_handle(meta_index); - footer.set_index_handle(index); - footer.EncodeTo(&encoded); + Footer footer; + footer.set_table_magic_number(kLegacyBlockBasedTableMagicNumber) + .set_format_version(0) + .set_metaindex_handle(meta_index) + .set_index_handle(index); + footer.EncodeTo(&encoded, footer_offset); Footer decoded_footer; Slice encoded_slice(encoded); - ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice)); + ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice, footer_offset)); ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber); - ASSERT_EQ(decoded_footer.checksum(), kCRC32c); + ASSERT_EQ(decoded_footer.checksum_type(), kCRC32c); ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset()); ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size()); ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset()); ASSERT_EQ(decoded_footer.index_handle().size(), index.size()); - ASSERT_EQ(decoded_footer.version(), 0U); + ASSERT_EQ(decoded_footer.format_version(), 0U); + ASSERT_EQ(decoded_footer.GetBlockTrailerSize(), 5U); } + // block based, various checksums, various versions for (auto t : GetSupportedChecksums()) { - // block based, various checksums - std::string encoded; - Footer footer(kBlockBasedTableMagicNumber, 1); - BlockHandle meta_index(10, 5), index(20, 15); - footer.set_metaindex_handle(meta_index); - footer.set_index_handle(index); - footer.set_checksum(t); - footer.EncodeTo(&encoded); - Footer decoded_footer; - Slice encoded_slice(encoded); - ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice)); - ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber); - ASSERT_EQ(decoded_footer.checksum(), t); - ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset()); - ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size()); - ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset()); - ASSERT_EQ(decoded_footer.index_handle().size(), index.size()); - ASSERT_EQ(decoded_footer.version(), 1U); + for (uint32_t fv = 1; IsSupportedFormatVersion(fv); ++fv) { + std::string encoded; + Footer footer; + footer.set_table_magic_number(kBlockBasedTableMagicNumber) + .set_format_version(fv) + .set_metaindex_handle(meta_index) + .set_index_handle(index) + .set_checksum_type(t); + footer.EncodeTo(&encoded, footer_offset); + Footer decoded_footer; + Slice encoded_slice(encoded); + ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice, footer_offset)); + ASSERT_EQ(decoded_footer.table_magic_number(), + kBlockBasedTableMagicNumber); + ASSERT_EQ(decoded_footer.checksum_type(), t); + ASSERT_EQ(decoded_footer.metaindex_handle().offset(), + meta_index.offset()); + ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size()); + ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset()); + ASSERT_EQ(decoded_footer.index_handle().size(), index.size()); + ASSERT_EQ(decoded_footer.format_version(), fv); + ASSERT_EQ(decoded_footer.GetBlockTrailerSize(), 5U); + } } // Plain table is not supported in ROCKSDB_LITE #ifndef ROCKSDB_LITE { // upconvert legacy plain table std::string encoded; - Footer footer(kLegacyPlainTableMagicNumber, 0); - BlockHandle meta_index(10, 5), index(20, 15); - footer.set_metaindex_handle(meta_index); - footer.set_index_handle(index); - footer.EncodeTo(&encoded); + Footer footer; + footer.set_table_magic_number(kLegacyPlainTableMagicNumber) + .set_format_version(0) + .set_metaindex_handle(meta_index) + .set_index_handle(index); + footer.EncodeTo(&encoded, footer_offset); Footer decoded_footer; Slice encoded_slice(encoded); - ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice)); + ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice, footer_offset)); ASSERT_EQ(decoded_footer.table_magic_number(), kPlainTableMagicNumber); - ASSERT_EQ(decoded_footer.checksum(), kCRC32c); + ASSERT_EQ(decoded_footer.checksum_type(), kCRC32c); ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset()); ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size()); ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset()); ASSERT_EQ(decoded_footer.index_handle().size(), index.size()); - ASSERT_EQ(decoded_footer.version(), 0U); + ASSERT_EQ(decoded_footer.format_version(), 0U); + ASSERT_EQ(decoded_footer.GetBlockTrailerSize(), 0U); } { // xxhash plain table (not currently used) std::string encoded; - Footer footer(kPlainTableMagicNumber, 1); - BlockHandle meta_index(10, 5), index(20, 15); - footer.set_metaindex_handle(meta_index); - footer.set_index_handle(index); - footer.set_checksum(kxxHash); - footer.EncodeTo(&encoded); + Footer footer; + footer.set_table_magic_number(kPlainTableMagicNumber) + .set_format_version(1) + .set_metaindex_handle(meta_index) + .set_index_handle(index) + .set_checksum_type(kxxHash); + footer.EncodeTo(&encoded, footer_offset); Footer decoded_footer; Slice encoded_slice(encoded); - ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice)); + ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice, footer_offset)); ASSERT_EQ(decoded_footer.table_magic_number(), kPlainTableMagicNumber); - ASSERT_EQ(decoded_footer.checksum(), kxxHash); + ASSERT_EQ(decoded_footer.checksum_type(), kxxHash); ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset()); ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size()); ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset()); ASSERT_EQ(decoded_footer.index_handle().size(), index.size()); - ASSERT_EQ(decoded_footer.version(), 1U); + ASSERT_EQ(decoded_footer.format_version(), 1U); + ASSERT_EQ(decoded_footer.GetBlockTrailerSize(), 0U); } #endif // !ROCKSDB_LITE - { - // version == 2 - std::string encoded; - Footer footer(kBlockBasedTableMagicNumber, 2); - BlockHandle meta_index(10, 5), index(20, 15); - footer.set_metaindex_handle(meta_index); - footer.set_index_handle(index); - footer.EncodeTo(&encoded); - Footer decoded_footer; - Slice encoded_slice(encoded); - ASSERT_OK(decoded_footer.DecodeFrom(&encoded_slice)); - ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber); - ASSERT_EQ(decoded_footer.checksum(), kCRC32c); - ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset()); - ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size()); - ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset()); - ASSERT_EQ(decoded_footer.index_handle().size(), index.size()); - ASSERT_EQ(decoded_footer.version(), 2U); - } } class IndexBlockRestartIntervalTest @@ -4786,7 +4787,7 @@ TEST_P(BlockBasedTableTest, PropertiesBlockRestartPointTest) { // -- Read properties block BlockHandle properties_handle; - ASSERT_OK(FindOptionalMetaBlock(meta_iter.get(), kPropertiesBlock, + ASSERT_OK(FindOptionalMetaBlock(meta_iter.get(), kPropertiesBlockName, &properties_handle)); ASSERT_FALSE(properties_handle.IsNull()); BlockContents properties_contents; @@ -4873,7 +4874,7 @@ TEST_P(BlockBasedTableTest, PropertiesMetaBlockLast) { key_at_max_offset = metaindex_iter->key().ToString(); } } - ASSERT_EQ(kPropertiesBlock, key_at_max_offset); + ASSERT_EQ(kPropertiesBlockName, key_at_max_offset); // index handle is stored in footer rather than metaindex block, so need // separate logic to verify it comes before properties block. ASSERT_GT(max_offset, footer.index_handle().offset()); @@ -5369,6 +5370,7 @@ TEST_P( } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { + ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } diff --git a/test_util/testutil.cc b/test_util/testutil.cc index 78d09ee0f..0fe789e71 100644 --- a/test_util/testutil.cc +++ b/test_util/testutil.cc @@ -38,7 +38,12 @@ namespace ROCKSDB_NAMESPACE { namespace test { const uint32_t kDefaultFormatVersion = BlockBasedTableOptions().format_version; -const uint32_t kLatestFormatVersion = 5u; +const std::set kFooterFormatVersionsToTest{ + 5U, + // In case any interesting future changes + kDefaultFormatVersion, + kLatestFormatVersion, +}; std::string RandomKey(Random* rnd, int len, RandomKeyType type) { // Make sure to generate a wide variety of characters so we diff --git a/test_util/testutil.h b/test_util/testutil.h index 7edf187f4..a43981cfa 100644 --- a/test_util/testutil.h +++ b/test_util/testutil.h @@ -44,7 +44,7 @@ class SequentialFileReader; namespace test { extern const uint32_t kDefaultFormatVersion; -extern const uint32_t kLatestFormatVersion; +extern const std::set kFooterFormatVersionsToTest; // Return a random key with the specified length that may contain interesting // characters (e.g. \x00, \xff, etc.). diff --git a/util/cast_util.h b/util/cast_util.h index c7f515eeb..c91b6ff1e 100644 --- a/util/cast_util.h +++ b/util/cast_util.h @@ -5,6 +5,8 @@ #pragma once +#include + #include "rocksdb/rocksdb_namespace.h" namespace ROCKSDB_NAMESPACE { @@ -20,4 +22,21 @@ inline DestClass* static_cast_with_check(SrcClass* x) { #endif return ret; } + +// A wrapper around static_cast for lossless conversion between integral +// types, including enum types. For example, this can be used for converting +// between signed/unsigned or enum type and underlying type without fear of +// stripping away data, now or in the future. +template +inline To lossless_cast(From x) { + using FromValue = typename std::remove_reference::type; + static_assert( + std::is_integral::value || std::is_enum::value, + "Only works on integral types"); + static_assert(std::is_integral::value || std::is_enum::value, + "Only works on integral types"); + static_assert(sizeof(To) >= sizeof(FromValue), "Must be lossless"); + return static_cast(x); +} + } // namespace ROCKSDB_NAMESPACE diff --git a/util/coding.h b/util/coding.h index 876f181f1..72f63bc6b 100644 --- a/util/coding.h +++ b/util/coding.h @@ -31,7 +31,7 @@ namespace ROCKSDB_NAMESPACE { // The maximum length of a varint in bytes for 64-bit. -const unsigned int kMaxVarint64Length = 10; +const uint32_t kMaxVarint64Length = 10; // Standard Put... routines append to a string extern void PutFixed16(std::string* dst, uint16_t value);