From 402fe7d4695daa3c52f99a2383cc28245d58c338 Mon Sep 17 00:00:00 2001 From: anand76 Date: Fri, 5 Jun 2020 11:06:26 -0700 Subject: [PATCH] Check iterator status BlockBasedTableReader::VerifyChecksumInBlocks() (#6909) Summary: The ```for``` loop in ```VerifyChecksumInBlocks``` only checks ```index_iter->Valid()``` which could be ```false``` either due to reaching the end of the index or, in case of partitioned index, it could be due to a checksum mismatch error when reading a 2nd level index block. Instead of throwing away the index iterator status, we need to return any errors back to the caller. Tests: Add a test in block_based_table_reader_test.cc. Pull Request resolved: https://github.com/facebook/rocksdb/pull/6909 Reviewed By: pdillinger Differential Revision: D21833922 Pulled By: anand1976 fbshipit-source-id: bc778ebf1121dbbdd768689de5183f07a9f0beae --- HISTORY.md | 4 + db/corruption_test.cc | 42 +-- table/block_based/block_based_table_reader.cc | 6 + table/block_based/block_based_table_reader.h | 1 + .../block_based_table_reader_test.cc | 341 ++++++++++++++++++ .../block_based/partitioned_index_iterator.h | 1 + test_util/testutil.cc | 42 +++ test_util/testutil.h | 2 + 8 files changed, 400 insertions(+), 39 deletions(-) create mode 100644 table/block_based/block_based_table_reader_test.cc diff --git a/HISTORY.md b/HISTORY.md index 7ddfbb835..399d87714 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,4 +1,8 @@ # Rocksdb Change Log +## 6.10.2 (6/5/2020) +### Bug fix +* Fix false negative from the VerifyChecksum() API when there is a checksum mismatch in an index partition block in a BlockBasedTable format table file (index_type is kTwoLevelIndexSearch). + ## 6.10.1 (5/27/2020) ### Bug fix * Remove "u''" in TARGETS file. diff --git a/db/corruption_test.cc b/db/corruption_test.cc index da020967d..7a2498863 100644 --- a/db/corruption_test.cc +++ b/db/corruption_test.cc @@ -157,42 +157,6 @@ class CorruptionTest : public testing::Test { ASSERT_GE(max_expected, correct); } - void CorruptFile(const std::string& fname, int offset, int bytes_to_corrupt) { - struct stat sbuf; - if (stat(fname.c_str(), &sbuf) != 0) { - const char* msg = strerror(errno); - FAIL() << fname << ": " << msg; - } - - if (offset < 0) { - // Relative to end of file; make it absolute - if (-offset > sbuf.st_size) { - offset = 0; - } else { - offset = static_cast(sbuf.st_size + offset); - } - } - if (offset > sbuf.st_size) { - offset = static_cast(sbuf.st_size); - } - if (offset + bytes_to_corrupt > sbuf.st_size) { - bytes_to_corrupt = static_cast(sbuf.st_size - offset); - } - - // Do it - std::string contents; - Status s = ReadFileToString(Env::Default(), fname, &contents); - ASSERT_TRUE(s.ok()) << s.ToString(); - for (int i = 0; i < bytes_to_corrupt; i++) { - contents[i + offset] ^= 0x80; - } - s = WriteStringToFile(Env::Default(), contents, fname); - ASSERT_TRUE(s.ok()) << s.ToString(); - Options options; - EnvOptions env_options; - ASSERT_NOK(VerifySstFileChecksum(options, env_options, fname)); - } - void Corrupt(FileType filetype, int offset, int bytes_to_corrupt) { // Pick file to corrupt std::vector filenames; @@ -211,7 +175,7 @@ class CorruptionTest : public testing::Test { } ASSERT_TRUE(!fname.empty()) << filetype; - CorruptFile(fname, offset, bytes_to_corrupt); + test::CorruptFile(fname, offset, bytes_to_corrupt); } // corrupts exactly one file at level `level`. if no file found at level, @@ -221,7 +185,7 @@ class CorruptionTest : public testing::Test { db_->GetLiveFilesMetaData(&metadata); for (const auto& m : metadata) { if (m.level == level) { - CorruptFile(dbname_ + "/" + m.name, offset, bytes_to_corrupt); + test::CorruptFile(dbname_ + "/" + m.name, offset, bytes_to_corrupt); return; } } @@ -556,7 +520,7 @@ TEST_F(CorruptionTest, RangeDeletionCorrupted) { ImmutableCFOptions(options_), kRangeDelBlock, &range_del_handle)); ASSERT_OK(TryReopen()); - CorruptFile(filename, static_cast(range_del_handle.offset()), 1); + test::CorruptFile(filename, static_cast(range_del_handle.offset()), 1); ASSERT_TRUE(TryReopen().IsCorruption()); } diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index 303484e4a..4b40b0c4d 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -2818,6 +2818,12 @@ Status BlockBasedTable::VerifyChecksumInBlocks( break; } } + if (s.ok()) { + // In the case of two level indexes, we would have exited the above loop + // by checking index_iter->Valid(), but Valid() might have returned false + // due to an IO error. So check the index_iter status + s = index_iter->status(); + } return s; } diff --git a/table/block_based/block_based_table_reader.h b/table/block_based/block_based_table_reader.h index d2aca6cd4..d595f8a65 100644 --- a/table/block_based/block_based_table_reader.h +++ b/table/block_based/block_based_table_reader.h @@ -252,6 +252,7 @@ class BlockBasedTable : public TableReader { private: friend class MockedBlockBasedTable; + friend class BlockBasedTableReaderTestVerifyChecksum_ChecksumMismatch_Test; static std::atomic next_cache_key_id_; BlockCacheTracer* const block_cache_tracer_; diff --git a/table/block_based/block_based_table_reader_test.cc b/table/block_based/block_based_table_reader_test.cc new file mode 100644 index 000000000..194010fcf --- /dev/null +++ b/table/block_based/block_based_table_reader_test.cc @@ -0,0 +1,341 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include "table/block_based/block_based_table_reader.h" +#include "rocksdb/file_system.h" +#include "table/block_based/partitioned_index_iterator.h" + +#include "db/table_properties_collector.h" +#include "options/options_helper.h" +#include "port/port.h" +#include "port/stack_trace.h" +#include "table/block_based/block_based_table_builder.h" +#include "table/block_based/block_based_table_factory.h" +#include "table/format.h" +#include "test_util/testharness.h" +#include "test_util/testutil.h" + +namespace ROCKSDB_NAMESPACE { + +class BlockBasedTableReaderTest + : public testing::Test, + public testing::WithParamInterface> { + protected: + CompressionType compression_type_; + bool use_direct_reads_; + + void SetUp() override { + BlockBasedTableOptions::IndexType index_type; + bool no_block_cache; + std::tie(compression_type_, use_direct_reads_, index_type, no_block_cache) = + GetParam(); + + test::SetupSyncPointsToMockDirectIO(); + test_dir_ = test::PerThreadDBPath("block_based_table_reader_test"); + env_ = Env::Default(); + fs_ = FileSystem::Default(); + ASSERT_OK(fs_->CreateDir(test_dir_, IOOptions(), nullptr)); + + BlockBasedTableOptions opts; + opts.index_type = index_type; + opts.no_block_cache = no_block_cache; + table_factory_.reset( + static_cast(NewBlockBasedTableFactory(opts))); + } + + void TearDown() override { EXPECT_OK(test::DestroyDir(env_, test_dir_)); } + + // Creates a table with the specificied key value pairs (kv). + void CreateTable(const std::string& table_name, + const CompressionType& compression_type, + const std::map& kv) { + std::unique_ptr writer; + NewFileWriter(table_name, &writer); + + // Create table builder. + Options options; + ImmutableCFOptions ioptions(options); + InternalKeyComparator comparator(options.comparator); + ColumnFamilyOptions cf_options; + MutableCFOptions moptions(cf_options); + std::vector> factories; + std::unique_ptr table_builder(table_factory_->NewTableBuilder( + TableBuilderOptions(ioptions, moptions, comparator, &factories, + compression_type, 0 /* sample_for_compression */, + CompressionOptions(), false /* skip_filters */, + kDefaultColumnFamilyName, -1 /* level */), + 0 /* column_family_id */, writer.get())); + + // Build table. + for (auto it = kv.begin(); it != kv.end(); it++) { + std::string k = ToInternalKey(it->first); + std::string v = it->second; + table_builder->Add(k, v); + } + ASSERT_OK(table_builder->Finish()); + } + + void NewBlockBasedTableReader(const FileOptions& foptions, + const ImmutableCFOptions& ioptions, + const InternalKeyComparator& comparator, + const std::string& table_name, + std::unique_ptr* table) { + std::unique_ptr file; + NewFileReader(table_name, foptions, &file); + + uint64_t file_size = 0; + ASSERT_OK(env_->GetFileSize(Path(table_name), &file_size)); + + std::unique_ptr table_reader; + ASSERT_OK(BlockBasedTable::Open(ioptions, EnvOptions(), + table_factory_->table_options(), comparator, + std::move(file), file_size, &table_reader)); + + table->reset(reinterpret_cast(table_reader.release())); + } + + std::string Path(const std::string& fname) { return test_dir_ + "/" + fname; } + + const std::shared_ptr& fs() const { return fs_; } + + private: + std::string test_dir_; + Env* env_; + std::shared_ptr fs_; + std::unique_ptr table_factory_; + + void WriteToFile(const std::string& content, const std::string& filename) { + std::unique_ptr f; + ASSERT_OK(fs_->NewWritableFile(Path(filename), FileOptions(), &f, nullptr)); + ASSERT_OK(f->Append(content, IOOptions(), nullptr)); + ASSERT_OK(f->Close(IOOptions(), nullptr)); + } + + void NewFileWriter(const std::string& filename, + std::unique_ptr* writer) { + std::string path = Path(filename); + EnvOptions env_options; + FileOptions foptions; + std::unique_ptr file; + ASSERT_OK(fs_->NewWritableFile(path, foptions, &file, nullptr)); + writer->reset(new WritableFileWriter(std::move(file), path, env_options)); + } + + void NewFileReader(const std::string& filename, const FileOptions& opt, + std::unique_ptr* reader) { + std::string path = Path(filename); + std::unique_ptr f; + ASSERT_OK(fs_->NewRandomAccessFile(path, opt, &f, nullptr)); + reader->reset(new RandomAccessFileReader(std::move(f), path, env_)); + } + + std::string ToInternalKey(const std::string& key) { + InternalKey internal_key(key, 0, ValueType::kTypeValue); + return internal_key.Encode().ToString(); + } +}; + +// Tests MultiGet in both direct IO and non-direct IO mode. +// The keys should be in cache after MultiGet. +TEST_P(BlockBasedTableReaderTest, MultiGet) { + // Prepare key-value pairs to occupy multiple blocks. + // Each value is 256B, every 16 pairs constitute 1 block. + // Adjacent blocks contain values with different compression complexity: + // human readable strings are easier to compress than random strings. + std::map kv; + { + Random rnd(101); + uint32_t key = 0; + for (int block = 0; block < 100; block++) { + for (int i = 0; i < 16; i++) { + char k[9] = {0}; + // Internal key is constructed directly from this key, + // and internal key size is required to be >= 8 bytes, + // so use %08u as the format string. + sprintf(k, "%08u", key); + std::string v; + if (block % 2) { + v = test::RandomHumanReadableString(&rnd, 256); + } else { + test::RandomString(&rnd, 256, &v); + } + kv[std::string(k)] = v; + key++; + } + } + } + + // Prepare keys, values, and statuses for MultiGet. + autovector keys; + autovector values; + autovector statuses; + { + const int step = + static_cast(kv.size()) / MultiGetContext::MAX_BATCH_SIZE; + auto it = kv.begin(); + for (int i = 0; i < MultiGetContext::MAX_BATCH_SIZE; i++) { + keys.emplace_back(it->first); + values.emplace_back(); + statuses.emplace_back(); + std::advance(it, step); + } + } + + std::string table_name = + "BlockBasedTableReaderTest" + CompressionTypeToString(compression_type_); + CreateTable(table_name, compression_type_, kv); + + std::unique_ptr table; + Options options; + ImmutableCFOptions ioptions(options); + FileOptions foptions; + foptions.use_direct_reads = use_direct_reads_; + InternalKeyComparator comparator(options.comparator); + NewBlockBasedTableReader(foptions, ioptions, comparator, table_name, &table); + + // Ensure that keys are not in cache before MultiGet. + for (auto& key : keys) { + ASSERT_FALSE(table->TEST_KeyInCache(ReadOptions(), key)); + } + + // Prepare MultiGetContext. + autovector get_context; + autovector key_context; + autovector sorted_keys; + for (size_t i = 0; i < keys.size(); ++i) { + get_context.emplace_back( + BytewiseComparator(), nullptr, nullptr, nullptr, GetContext::kNotFound, + keys[i], &values[i], nullptr, nullptr, nullptr, true /* do_merge */, + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr); + key_context.emplace_back(nullptr, keys[i], &values[i], nullptr, + &statuses.back()); + key_context.back().get_context = &get_context.back(); + } + for (auto& key_ctx : key_context) { + sorted_keys.emplace_back(&key_ctx); + } + MultiGetContext ctx(&sorted_keys, 0, sorted_keys.size(), 0, ReadOptions()); + + // Execute MultiGet. + MultiGetContext::Range range = ctx.GetMultiGetRange(); + table->MultiGet(ReadOptions(), &range, nullptr); + + for (const Status& status : statuses) { + ASSERT_OK(status); + } + // Check that keys are in cache after MultiGet. + for (size_t i = 0; i < keys.size(); i++) { + ASSERT_TRUE(table->TEST_KeyInCache(ReadOptions(), keys[i])); + ASSERT_EQ(values[i].ToString(), kv[keys[i].ToString()]); + } +} + +class BlockBasedTableReaderTestVerifyChecksum + : public BlockBasedTableReaderTest { + public: + BlockBasedTableReaderTestVerifyChecksum() : BlockBasedTableReaderTest() {} +}; + +TEST_P(BlockBasedTableReaderTestVerifyChecksum, ChecksumMismatch) { + // Prepare key-value pairs to occupy multiple blocks. + // Each value is 256B, every 16 pairs constitute 1 block. + // Adjacent blocks contain values with different compression complexity: + // human readable strings are easier to compress than random strings. + Random rnd(101); + std::map kv; + { + uint32_t key = 0; + for (int block = 0; block < 800; block++) { + for (int i = 0; i < 16; i++) { + char k[9] = {0}; + // Internal key is constructed directly from this key, + // and internal key size is required to be >= 8 bytes, + // so use %08u as the format string. + sprintf(k, "%08u", key); + std::string v; + test::RandomString(&rnd, 256, &v); + kv[std::string(k)] = v; + key++; + } + } + } + + std::string table_name = + "BlockBasedTableReaderTest" + CompressionTypeToString(compression_type_); + CreateTable(table_name, compression_type_, kv); + + std::unique_ptr table; + Options options; + ImmutableCFOptions ioptions(options); + FileOptions foptions; + foptions.use_direct_reads = use_direct_reads_; + InternalKeyComparator comparator(options.comparator); + NewBlockBasedTableReader(foptions, ioptions, comparator, table_name, &table); + + // Use the top level iterator to find the offset/size of the first + // 2nd level index block and corrupt the block + IndexBlockIter iiter_on_stack; + BlockCacheLookupContext context{TableReaderCaller::kUserVerifyChecksum}; + InternalIteratorBase* iiter = table->NewIndexIterator( + ReadOptions(), /*disable_prefix_seek=*/false, &iiter_on_stack, + /*get_context=*/nullptr, &context); + std::unique_ptr> iiter_unique_ptr; + if (iiter != &iiter_on_stack) { + iiter_unique_ptr = std::unique_ptr>(iiter); + } + ASSERT_OK(iiter->status()); + iiter->SeekToFirst(); + BlockHandle handle = static_cast(iiter) + ->index_iter_->value() + .handle; + table.reset(); + + // Corrupt the block pointed to by handle + test::CorruptFile(Path(table_name), static_cast(handle.offset()), 128); + + NewBlockBasedTableReader(foptions, ioptions, comparator, table_name, &table); + Status s = table->VerifyChecksum(ReadOptions(), + TableReaderCaller::kUserVerifyChecksum); + ASSERT_EQ(s.code(), Status::kCorruption); +} + +// Param 1: compression type +// Param 2: whether to use direct reads +// Param 3: Block Based Table Index type +// Param 4: BBTO no_block_cache option +#ifdef ROCKSDB_LITE +// Skip direct I/O tests in lite mode since direct I/O is unsupported. +INSTANTIATE_TEST_CASE_P( + MultiGet, BlockBasedTableReaderTest, + ::testing::Combine( + ::testing::ValuesIn(GetSupportedCompressions()), + ::testing::Values(false), + ::testing::Values(BlockBasedTableOptions::IndexType::kBinarySearch), + ::testing::Values(false))); +#else // ROCKSDB_LITE +INSTANTIATE_TEST_CASE_P( + MultiGet, BlockBasedTableReaderTest, + ::testing::Combine( + ::testing::ValuesIn(GetSupportedCompressions()), ::testing::Bool(), + ::testing::Values(BlockBasedTableOptions::IndexType::kBinarySearch), + ::testing::Values(false))); +#endif // ROCKSDB_LITE +INSTANTIATE_TEST_CASE_P( + VerifyChecksum, BlockBasedTableReaderTestVerifyChecksum, + ::testing::Combine( + ::testing::ValuesIn(GetSupportedCompressions()), + ::testing::Values(false), + ::testing::Values( + BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch), + ::testing::Values(true))); + +} // namespace ROCKSDB_NAMESPACE + +int main(int argc, char** argv) { + ROCKSDB_NAMESPACE::port::InstallStackTraceHandler(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/table/block_based/partitioned_index_iterator.h b/table/block_based/partitioned_index_iterator.h index ea1941e67..747c0553a 100644 --- a/table/block_based/partitioned_index_iterator.h +++ b/table/block_based/partitioned_index_iterator.h @@ -122,6 +122,7 @@ class ParititionedIndexIterator : public InternalIteratorBase { } private: + friend class BlockBasedTableReaderTestVerifyChecksum_ChecksumMismatch_Test; const BlockBasedTable* table_; const ReadOptions read_options_; #ifndef NDEBUG diff --git a/test_util/testutil.cc b/test_util/testutil.cc index 4c6ef0999..9e34ac5a4 100644 --- a/test_util/testutil.cc +++ b/test_util/testutil.cc @@ -10,6 +10,7 @@ #include "test_util/testutil.h" #include +#include #include #include #include @@ -540,5 +541,46 @@ void SetupSyncPointsToMockDirectIO() { #endif } +void CorruptFile(const std::string& fname, int offset, int bytes_to_corrupt) { + struct stat sbuf; + if (stat(fname.c_str(), &sbuf) != 0) { + // strerror is not thread-safe so should not be used in the "passing" path + // of unit tests (sometimes parallelized) but is OK here where test fails + const char* msg = strerror(errno); + fprintf(stderr, "%s:%s\n", fname.c_str(), msg); + assert(false); + } + + if (offset < 0) { + // Relative to end of file; make it absolute + if (-offset > sbuf.st_size) { + offset = 0; + } else { + offset = static_cast(sbuf.st_size + offset); + } + } + if (offset > sbuf.st_size) { + offset = static_cast(sbuf.st_size); + } + if (offset + bytes_to_corrupt > sbuf.st_size) { + bytes_to_corrupt = static_cast(sbuf.st_size - offset); + } + + // Do it + std::string contents; + Status s = ReadFileToString(Env::Default(), fname, &contents); + assert(s.ok()); + for (int i = 0; i < bytes_to_corrupt; i++) { + contents[i + offset] ^= 0x80; + } + s = WriteStringToFile(Env::Default(), contents, fname); + assert(s.ok()); + Options options; + EnvOptions env_options; +#ifndef ROCKSDB_LITE + assert(!VerifySstFileChecksum(options, env_options, fname).ok()); +#endif +} + } // namespace test } // namespace ROCKSDB_NAMESPACE diff --git a/test_util/testutil.h b/test_util/testutil.h index 83779b538..ee7ec7c83 100644 --- a/test_util/testutil.h +++ b/test_util/testutil.h @@ -809,5 +809,7 @@ void ResetTmpDirForDirectIO(); // to the file system. void SetupSyncPointsToMockDirectIO(); +void CorruptFile(const std::string& fname, int offset, int bytes_to_corrupt); + } // namespace test } // namespace ROCKSDB_NAMESPACE