Verify checksum before ingestion (#4916)
Summary: before file ingestion (in preparation phase), verify the checksums of the blocks of the external SST file, including properties block with global seqno. Pull Request resolved: https://github.com/facebook/rocksdb/pull/4916 Differential Revision: D13863501 Pulled By: riversand963 fbshipit-source-id: dc54697f970e3807832e2460f7228fcc7efe81ee
This commit is contained in:
parent
d0d484b132
commit
158da7a6ee
@ -6,6 +6,7 @@
|
||||
* RocksDB may choose to preopen some files even if options.max_open_files != -1. This may make DB open slightly longer.
|
||||
* For users of dictionary compression with ZSTD v0.7.0+, we now reuse the same digested dictionary when compressing each of an SST file's data blocks for faster compression speeds.
|
||||
* For all users of dictionary compression who set `cache_index_and_filter_blocks == true`, we now store dictionary data used for decompression in the block cache for better control over memory usage. For users of ZSTD v1.1.4+ who compile with -DZSTD_STATIC_LINKING_ONLY, this includes a digested dictionary, which is used to increase decompression speed.
|
||||
* Add support for block checksums verification for external SST files before ingestion.
|
||||
|
||||
### Public API Change
|
||||
* CompactionPri = kMinOverlappingRatio also uses compensated file size, which boosts file with lots of tombstones to be compacted first.
|
||||
|
@ -14,8 +14,9 @@
|
||||
namespace rocksdb {
|
||||
|
||||
#ifndef ROCKSDB_LITE
|
||||
class ExternalSSTFileBasicTest : public DBTestBase,
|
||||
public ::testing::WithParamInterface<bool> {
|
||||
class ExternalSSTFileBasicTest
|
||||
: public DBTestBase,
|
||||
public ::testing::WithParamInterface<std::tuple<bool, bool>> {
|
||||
public:
|
||||
ExternalSSTFileBasicTest() : DBTestBase("/external_sst_file_basic_test") {
|
||||
sst_files_dir_ = dbname_ + "/sst_files/";
|
||||
@ -42,7 +43,8 @@ class ExternalSSTFileBasicTest : public DBTestBase,
|
||||
const Options options, std::vector<int> keys,
|
||||
const std::vector<ValueType>& value_types,
|
||||
std::vector<std::pair<int, int>> range_deletions, int file_id,
|
||||
bool write_global_seqno, std::map<std::string, std::string>* true_data) {
|
||||
bool write_global_seqno, bool verify_checksums_before_ingest,
|
||||
std::map<std::string, std::string>* true_data) {
|
||||
assert(value_types.size() == 1 || keys.size() == value_types.size());
|
||||
std::string file_path = sst_files_dir_ + ToString(file_id);
|
||||
SstFileWriter sst_file_writer(EnvOptions(), options);
|
||||
@ -107,6 +109,7 @@ class ExternalSSTFileBasicTest : public DBTestBase,
|
||||
IngestExternalFileOptions ifo;
|
||||
ifo.allow_global_seqno = true;
|
||||
ifo.write_global_seqno = write_global_seqno;
|
||||
ifo.verify_checksums_before_ingest = verify_checksums_before_ingest;
|
||||
s = db_->IngestExternalFile({file_path}, ifo);
|
||||
}
|
||||
return s;
|
||||
@ -115,18 +118,20 @@ class ExternalSSTFileBasicTest : public DBTestBase,
|
||||
Status GenerateAndAddExternalFile(
|
||||
const Options options, std::vector<int> keys,
|
||||
const std::vector<ValueType>& value_types, int file_id,
|
||||
bool write_global_seqno, std::map<std::string, std::string>* true_data) {
|
||||
return GenerateAndAddExternalFile(options, keys, value_types, {}, file_id,
|
||||
write_global_seqno, true_data);
|
||||
bool write_global_seqno, bool verify_checksums_before_ingest,
|
||||
std::map<std::string, std::string>* true_data) {
|
||||
return GenerateAndAddExternalFile(
|
||||
options, keys, value_types, {}, file_id, write_global_seqno,
|
||||
verify_checksums_before_ingest, true_data);
|
||||
}
|
||||
|
||||
Status GenerateAndAddExternalFile(
|
||||
const Options options, std::vector<int> keys, const ValueType value_type,
|
||||
int file_id, bool write_global_seqno,
|
||||
int file_id, bool write_global_seqno, bool verify_checksums_before_ingest,
|
||||
std::map<std::string, std::string>* true_data) {
|
||||
return GenerateAndAddExternalFile(options, keys,
|
||||
std::vector<ValueType>(1, value_type),
|
||||
file_id, write_global_seqno, true_data);
|
||||
return GenerateAndAddExternalFile(
|
||||
options, keys, std::vector<ValueType>(1, value_type), file_id,
|
||||
write_global_seqno, verify_checksums_before_ingest, true_data);
|
||||
}
|
||||
|
||||
~ExternalSSTFileBasicTest() { test::DestroyDir(env_, sst_files_dir_); }
|
||||
@ -249,7 +254,8 @@ TEST_F(ExternalSSTFileBasicTest, NoCopy) {
|
||||
}
|
||||
|
||||
TEST_P(ExternalSSTFileBasicTest, IngestFileWithGlobalSeqnoPickedSeqno) {
|
||||
bool write_global_seqno = GetParam();
|
||||
bool write_global_seqno = std::get<0>(GetParam());
|
||||
bool verify_checksums_before_ingest = std::get<1>(GetParam());
|
||||
do {
|
||||
Options options = CurrentOptions();
|
||||
DestroyAndReopen(options);
|
||||
@ -257,39 +263,39 @@ TEST_P(ExternalSSTFileBasicTest, IngestFileWithGlobalSeqnoPickedSeqno) {
|
||||
|
||||
int file_id = 1;
|
||||
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {1, 2, 3, 4, 5, 6},
|
||||
ValueType::kTypeValue, file_id++,
|
||||
write_global_seqno, &true_data));
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {1, 2, 3, 4, 5, 6}, ValueType::kTypeValue, file_id++,
|
||||
write_global_seqno, verify_checksums_before_ingest, &true_data));
|
||||
// File doesn't overwrite any keys, no seqno needed
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 0);
|
||||
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {10, 11, 12, 13},
|
||||
ValueType::kTypeValue, file_id++,
|
||||
write_global_seqno, &true_data));
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {10, 11, 12, 13}, ValueType::kTypeValue, file_id++,
|
||||
write_global_seqno, verify_checksums_before_ingest, &true_data));
|
||||
// File doesn't overwrite any keys, no seqno needed
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 0);
|
||||
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {1, 4, 6},
|
||||
ValueType::kTypeValue, file_id++,
|
||||
write_global_seqno, &true_data));
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {1, 4, 6}, ValueType::kTypeValue, file_id++,
|
||||
write_global_seqno, verify_checksums_before_ingest, &true_data));
|
||||
// File overwrites some keys, a seqno will be assigned
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 1);
|
||||
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {11, 15, 19},
|
||||
ValueType::kTypeValue, file_id++,
|
||||
write_global_seqno, &true_data));
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {11, 15, 19}, ValueType::kTypeValue, file_id++,
|
||||
write_global_seqno, verify_checksums_before_ingest, &true_data));
|
||||
// File overwrites some keys, a seqno will be assigned
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 2);
|
||||
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {120, 130},
|
||||
ValueType::kTypeValue, file_id++,
|
||||
write_global_seqno, &true_data));
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {120, 130}, ValueType::kTypeValue, file_id++,
|
||||
write_global_seqno, verify_checksums_before_ingest, &true_data));
|
||||
// File doesn't overwrite any keys, no seqno needed
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 2);
|
||||
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {1, 130},
|
||||
ValueType::kTypeValue, file_id++,
|
||||
write_global_seqno, &true_data));
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {1, 130}, ValueType::kTypeValue, file_id++, write_global_seqno,
|
||||
verify_checksums_before_ingest, &true_data));
|
||||
// File overwrites some keys, a seqno will be assigned
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 3);
|
||||
|
||||
@ -300,21 +306,21 @@ TEST_P(ExternalSSTFileBasicTest, IngestFileWithGlobalSeqnoPickedSeqno) {
|
||||
}
|
||||
SequenceNumber last_seqno = dbfull()->GetLatestSequenceNumber();
|
||||
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {60, 61, 62},
|
||||
ValueType::kTypeValue, file_id++,
|
||||
write_global_seqno, &true_data));
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {60, 61, 62}, ValueType::kTypeValue, file_id++,
|
||||
write_global_seqno, verify_checksums_before_ingest, &true_data));
|
||||
// File doesn't overwrite any keys, no seqno needed
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno);
|
||||
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {40, 41, 42},
|
||||
ValueType::kTypeValue, file_id++,
|
||||
write_global_seqno, &true_data));
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {40, 41, 42}, ValueType::kTypeValue, file_id++,
|
||||
write_global_seqno, verify_checksums_before_ingest, &true_data));
|
||||
// File overwrites some keys, a seqno will be assigned
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 1);
|
||||
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {20, 30, 40},
|
||||
ValueType::kTypeValue, file_id++,
|
||||
write_global_seqno, &true_data));
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {20, 30, 40}, ValueType::kTypeValue, file_id++,
|
||||
write_global_seqno, verify_checksums_before_ingest, &true_data));
|
||||
// File overwrites some keys, a seqno will be assigned
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 2);
|
||||
|
||||
@ -322,29 +328,29 @@ TEST_P(ExternalSSTFileBasicTest, IngestFileWithGlobalSeqnoPickedSeqno) {
|
||||
|
||||
// We will need a seqno for the file regardless if the file overwrite
|
||||
// keys in the DB or not because we have a snapshot
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {1000, 1002},
|
||||
ValueType::kTypeValue, file_id++,
|
||||
write_global_seqno, &true_data));
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {1000, 1002}, ValueType::kTypeValue, file_id++,
|
||||
write_global_seqno, verify_checksums_before_ingest, &true_data));
|
||||
// A global seqno will be assigned anyway because of the snapshot
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 3);
|
||||
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {2000, 3002},
|
||||
ValueType::kTypeValue, file_id++,
|
||||
write_global_seqno, &true_data));
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {2000, 3002}, ValueType::kTypeValue, file_id++,
|
||||
write_global_seqno, verify_checksums_before_ingest, &true_data));
|
||||
// A global seqno will be assigned anyway because of the snapshot
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 4);
|
||||
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {1, 20, 40, 100, 150},
|
||||
ValueType::kTypeValue, file_id++,
|
||||
write_global_seqno, &true_data));
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {1, 20, 40, 100, 150}, ValueType::kTypeValue, file_id++,
|
||||
write_global_seqno, verify_checksums_before_ingest, &true_data));
|
||||
// A global seqno will be assigned anyway because of the snapshot
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 5);
|
||||
|
||||
db_->ReleaseSnapshot(snapshot);
|
||||
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {5000, 5001},
|
||||
ValueType::kTypeValue, file_id++,
|
||||
write_global_seqno, &true_data));
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {5000, 5001}, ValueType::kTypeValue, file_id++,
|
||||
write_global_seqno, verify_checksums_before_ingest, &true_data));
|
||||
// No snapshot anymore, no need to assign a seqno
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 5);
|
||||
|
||||
@ -354,7 +360,8 @@ TEST_P(ExternalSSTFileBasicTest, IngestFileWithGlobalSeqnoPickedSeqno) {
|
||||
}
|
||||
|
||||
TEST_P(ExternalSSTFileBasicTest, IngestFileWithMultipleValueType) {
|
||||
bool write_global_seqno = GetParam();
|
||||
bool write_global_seqno = std::get<0>(GetParam());
|
||||
bool verify_checksums_before_ingest = std::get<1>(GetParam());
|
||||
do {
|
||||
Options options = CurrentOptions();
|
||||
options.merge_operator.reset(new TestPutOperator());
|
||||
@ -363,59 +370,59 @@ TEST_P(ExternalSSTFileBasicTest, IngestFileWithMultipleValueType) {
|
||||
|
||||
int file_id = 1;
|
||||
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {1, 2, 3, 4, 5, 6},
|
||||
ValueType::kTypeValue, file_id++,
|
||||
write_global_seqno, &true_data));
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {1, 2, 3, 4, 5, 6}, ValueType::kTypeValue, file_id++,
|
||||
write_global_seqno, verify_checksums_before_ingest, &true_data));
|
||||
// File doesn't overwrite any keys, no seqno needed
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 0);
|
||||
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {10, 11, 12, 13},
|
||||
ValueType::kTypeValue, file_id++,
|
||||
write_global_seqno, &true_data));
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {10, 11, 12, 13}, ValueType::kTypeValue, file_id++,
|
||||
write_global_seqno, verify_checksums_before_ingest, &true_data));
|
||||
// File doesn't overwrite any keys, no seqno needed
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 0);
|
||||
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {1, 4, 6},
|
||||
ValueType::kTypeMerge, file_id++,
|
||||
write_global_seqno, &true_data));
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {1, 4, 6}, ValueType::kTypeMerge, file_id++,
|
||||
write_global_seqno, verify_checksums_before_ingest, &true_data));
|
||||
// File overwrites some keys, a seqno will be assigned
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 1);
|
||||
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {11, 15, 19},
|
||||
ValueType::kTypeDeletion, file_id++,
|
||||
write_global_seqno, &true_data));
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {11, 15, 19}, ValueType::kTypeDeletion, file_id++,
|
||||
write_global_seqno, verify_checksums_before_ingest, &true_data));
|
||||
// File overwrites some keys, a seqno will be assigned
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 2);
|
||||
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {120, 130},
|
||||
ValueType::kTypeMerge, file_id++,
|
||||
write_global_seqno, &true_data));
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {120, 130}, ValueType::kTypeMerge, file_id++,
|
||||
write_global_seqno, verify_checksums_before_ingest, &true_data));
|
||||
// File doesn't overwrite any keys, no seqno needed
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 2);
|
||||
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {1, 130},
|
||||
ValueType::kTypeDeletion, file_id++,
|
||||
write_global_seqno, &true_data));
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {1, 130}, ValueType::kTypeDeletion, file_id++,
|
||||
write_global_seqno, verify_checksums_before_ingest, &true_data));
|
||||
// File overwrites some keys, a seqno will be assigned
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 3);
|
||||
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {120}, {ValueType::kTypeValue}, {{120, 135}}, file_id++,
|
||||
write_global_seqno, &true_data));
|
||||
write_global_seqno, verify_checksums_before_ingest, &true_data));
|
||||
// File overwrites some keys, a seqno will be assigned
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 4);
|
||||
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {}, {}, {{110, 120}},
|
||||
file_id++, write_global_seqno,
|
||||
&true_data));
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {}, {}, {{110, 120}}, file_id++, write_global_seqno,
|
||||
verify_checksums_before_ingest, &true_data));
|
||||
// The range deletion ends on a key, but it doesn't actually delete
|
||||
// this key because the largest key in the range is exclusive. Still,
|
||||
// it counts as an overlap so a new seqno will be assigned.
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 5);
|
||||
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {}, {}, {{100, 109}},
|
||||
file_id++, write_global_seqno,
|
||||
&true_data));
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {}, {}, {{100, 109}}, file_id++, write_global_seqno,
|
||||
verify_checksums_before_ingest, &true_data));
|
||||
// File doesn't overwrite any keys, no seqno needed
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 5);
|
||||
|
||||
@ -426,21 +433,21 @@ TEST_P(ExternalSSTFileBasicTest, IngestFileWithMultipleValueType) {
|
||||
}
|
||||
SequenceNumber last_seqno = dbfull()->GetLatestSequenceNumber();
|
||||
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {60, 61, 62},
|
||||
ValueType::kTypeValue, file_id++,
|
||||
write_global_seqno, &true_data));
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {60, 61, 62}, ValueType::kTypeValue, file_id++,
|
||||
write_global_seqno, verify_checksums_before_ingest, &true_data));
|
||||
// File doesn't overwrite any keys, no seqno needed
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno);
|
||||
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {40, 41, 42},
|
||||
ValueType::kTypeMerge, file_id++,
|
||||
write_global_seqno, &true_data));
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {40, 41, 42}, ValueType::kTypeMerge, file_id++,
|
||||
write_global_seqno, verify_checksums_before_ingest, &true_data));
|
||||
// File overwrites some keys, a seqno will be assigned
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 1);
|
||||
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {20, 30, 40},
|
||||
ValueType::kTypeDeletion, file_id++,
|
||||
write_global_seqno, &true_data));
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {20, 30, 40}, ValueType::kTypeDeletion, file_id++,
|
||||
write_global_seqno, verify_checksums_before_ingest, &true_data));
|
||||
// File overwrites some keys, a seqno will be assigned
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 2);
|
||||
|
||||
@ -448,29 +455,29 @@ TEST_P(ExternalSSTFileBasicTest, IngestFileWithMultipleValueType) {
|
||||
|
||||
// We will need a seqno for the file regardless if the file overwrite
|
||||
// keys in the DB or not because we have a snapshot
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {1000, 1002},
|
||||
ValueType::kTypeMerge, file_id++,
|
||||
write_global_seqno, &true_data));
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {1000, 1002}, ValueType::kTypeMerge, file_id++,
|
||||
write_global_seqno, verify_checksums_before_ingest, &true_data));
|
||||
// A global seqno will be assigned anyway because of the snapshot
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 3);
|
||||
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {2000, 3002},
|
||||
ValueType::kTypeMerge, file_id++,
|
||||
write_global_seqno, &true_data));
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {2000, 3002}, ValueType::kTypeMerge, file_id++,
|
||||
write_global_seqno, verify_checksums_before_ingest, &true_data));
|
||||
// A global seqno will be assigned anyway because of the snapshot
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 4);
|
||||
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {1, 20, 40, 100, 150},
|
||||
ValueType::kTypeMerge, file_id++,
|
||||
write_global_seqno, &true_data));
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {1, 20, 40, 100, 150}, ValueType::kTypeMerge, file_id++,
|
||||
write_global_seqno, verify_checksums_before_ingest, &true_data));
|
||||
// A global seqno will be assigned anyway because of the snapshot
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 5);
|
||||
|
||||
db_->ReleaseSnapshot(snapshot);
|
||||
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {5000, 5001},
|
||||
ValueType::kTypeValue, file_id++,
|
||||
write_global_seqno, &true_data));
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {5000, 5001}, ValueType::kTypeValue, file_id++,
|
||||
write_global_seqno, verify_checksums_before_ingest, &true_data));
|
||||
// No snapshot anymore, no need to assign a seqno
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 5);
|
||||
|
||||
@ -480,7 +487,8 @@ TEST_P(ExternalSSTFileBasicTest, IngestFileWithMultipleValueType) {
|
||||
}
|
||||
|
||||
TEST_P(ExternalSSTFileBasicTest, IngestFileWithMixedValueType) {
|
||||
bool write_global_seqno = GetParam();
|
||||
bool write_global_seqno = std::get<0>(GetParam());
|
||||
bool verify_checksums_before_ingest = std::get<1>(GetParam());
|
||||
do {
|
||||
Options options = CurrentOptions();
|
||||
options.merge_operator.reset(new TestPutOperator());
|
||||
@ -493,7 +501,8 @@ TEST_P(ExternalSSTFileBasicTest, IngestFileWithMixedValueType) {
|
||||
options, {1, 2, 3, 4, 5, 6},
|
||||
{ValueType::kTypeValue, ValueType::kTypeMerge, ValueType::kTypeValue,
|
||||
ValueType::kTypeMerge, ValueType::kTypeValue, ValueType::kTypeMerge},
|
||||
file_id++, write_global_seqno, &true_data));
|
||||
file_id++, write_global_seqno, verify_checksums_before_ingest,
|
||||
&true_data));
|
||||
// File doesn't overwrite any keys, no seqno needed
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 0);
|
||||
|
||||
@ -501,7 +510,8 @@ TEST_P(ExternalSSTFileBasicTest, IngestFileWithMixedValueType) {
|
||||
options, {10, 11, 12, 13},
|
||||
{ValueType::kTypeValue, ValueType::kTypeMerge, ValueType::kTypeValue,
|
||||
ValueType::kTypeMerge},
|
||||
file_id++, write_global_seqno, &true_data));
|
||||
file_id++, write_global_seqno, verify_checksums_before_ingest,
|
||||
&true_data));
|
||||
// File doesn't overwrite any keys, no seqno needed
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 0);
|
||||
|
||||
@ -509,7 +519,8 @@ TEST_P(ExternalSSTFileBasicTest, IngestFileWithMixedValueType) {
|
||||
options, {1, 4, 6},
|
||||
{ValueType::kTypeDeletion, ValueType::kTypeValue,
|
||||
ValueType::kTypeMerge},
|
||||
file_id++, write_global_seqno, &true_data));
|
||||
file_id++, write_global_seqno, verify_checksums_before_ingest,
|
||||
&true_data));
|
||||
// File overwrites some keys, a seqno will be assigned
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 1);
|
||||
|
||||
@ -517,19 +528,22 @@ TEST_P(ExternalSSTFileBasicTest, IngestFileWithMixedValueType) {
|
||||
options, {11, 15, 19},
|
||||
{ValueType::kTypeDeletion, ValueType::kTypeMerge,
|
||||
ValueType::kTypeValue},
|
||||
file_id++, write_global_seqno, &true_data));
|
||||
file_id++, write_global_seqno, verify_checksums_before_ingest,
|
||||
&true_data));
|
||||
// File overwrites some keys, a seqno will be assigned
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 2);
|
||||
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {120, 130}, {ValueType::kTypeValue, ValueType::kTypeMerge},
|
||||
file_id++, write_global_seqno, &true_data));
|
||||
file_id++, write_global_seqno, verify_checksums_before_ingest,
|
||||
&true_data));
|
||||
// File doesn't overwrite any keys, no seqno needed
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 2);
|
||||
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {1, 130}, {ValueType::kTypeMerge, ValueType::kTypeDeletion},
|
||||
file_id++, write_global_seqno, &true_data));
|
||||
file_id++, write_global_seqno, verify_checksums_before_ingest,
|
||||
&true_data));
|
||||
// File overwrites some keys, a seqno will be assigned
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 3);
|
||||
|
||||
@ -537,14 +551,16 @@ TEST_P(ExternalSSTFileBasicTest, IngestFileWithMixedValueType) {
|
||||
options, {150, 151, 152},
|
||||
{ValueType::kTypeValue, ValueType::kTypeMerge,
|
||||
ValueType::kTypeDeletion},
|
||||
{{150, 160}, {180, 190}}, file_id++, write_global_seqno, &true_data));
|
||||
{{150, 160}, {180, 190}}, file_id++, write_global_seqno,
|
||||
verify_checksums_before_ingest, &true_data));
|
||||
// File doesn't overwrite any keys, no seqno needed
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 3);
|
||||
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {150, 151, 152},
|
||||
{ValueType::kTypeValue, ValueType::kTypeMerge, ValueType::kTypeValue},
|
||||
{{200, 250}}, file_id++, write_global_seqno, &true_data));
|
||||
{{200, 250}}, file_id++, write_global_seqno,
|
||||
verify_checksums_before_ingest, &true_data));
|
||||
// File overwrites some keys, a seqno will be assigned
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 4);
|
||||
|
||||
@ -552,7 +568,8 @@ TEST_P(ExternalSSTFileBasicTest, IngestFileWithMixedValueType) {
|
||||
options, {300, 301, 302},
|
||||
{ValueType::kTypeValue, ValueType::kTypeMerge,
|
||||
ValueType::kTypeDeletion},
|
||||
{{1, 2}, {152, 154}}, file_id++, write_global_seqno, &true_data));
|
||||
{{1, 2}, {152, 154}}, file_id++, write_global_seqno,
|
||||
verify_checksums_before_ingest, &true_data));
|
||||
// File overwrites some keys, a seqno will be assigned
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), 5);
|
||||
|
||||
@ -566,7 +583,8 @@ TEST_P(ExternalSSTFileBasicTest, IngestFileWithMixedValueType) {
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {60, 61, 62},
|
||||
{ValueType::kTypeValue, ValueType::kTypeMerge, ValueType::kTypeValue},
|
||||
file_id++, write_global_seqno, &true_data));
|
||||
file_id++, write_global_seqno, verify_checksums_before_ingest,
|
||||
&true_data));
|
||||
// File doesn't overwrite any keys, no seqno needed
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno);
|
||||
|
||||
@ -574,7 +592,8 @@ TEST_P(ExternalSSTFileBasicTest, IngestFileWithMixedValueType) {
|
||||
options, {40, 41, 42},
|
||||
{ValueType::kTypeValue, ValueType::kTypeDeletion,
|
||||
ValueType::kTypeDeletion},
|
||||
file_id++, write_global_seqno, &true_data));
|
||||
file_id++, write_global_seqno, verify_checksums_before_ingest,
|
||||
&true_data));
|
||||
// File overwrites some keys, a seqno will be assigned
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 1);
|
||||
|
||||
@ -582,7 +601,8 @@ TEST_P(ExternalSSTFileBasicTest, IngestFileWithMixedValueType) {
|
||||
options, {20, 30, 40},
|
||||
{ValueType::kTypeDeletion, ValueType::kTypeDeletion,
|
||||
ValueType::kTypeDeletion},
|
||||
file_id++, write_global_seqno, &true_data));
|
||||
file_id++, write_global_seqno, verify_checksums_before_ingest,
|
||||
&true_data));
|
||||
// File overwrites some keys, a seqno will be assigned
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 2);
|
||||
|
||||
@ -592,13 +612,15 @@ TEST_P(ExternalSSTFileBasicTest, IngestFileWithMixedValueType) {
|
||||
// keys in the DB or not because we have a snapshot
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {1000, 1002}, {ValueType::kTypeValue, ValueType::kTypeMerge},
|
||||
file_id++, write_global_seqno, &true_data));
|
||||
file_id++, write_global_seqno, verify_checksums_before_ingest,
|
||||
&true_data));
|
||||
// A global seqno will be assigned anyway because of the snapshot
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 3);
|
||||
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {2000, 3002}, {ValueType::kTypeValue, ValueType::kTypeMerge},
|
||||
file_id++, write_global_seqno, &true_data));
|
||||
file_id++, write_global_seqno, verify_checksums_before_ingest,
|
||||
&true_data));
|
||||
// A global seqno will be assigned anyway because of the snapshot
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 4);
|
||||
|
||||
@ -606,7 +628,8 @@ TEST_P(ExternalSSTFileBasicTest, IngestFileWithMixedValueType) {
|
||||
options, {1, 20, 40, 100, 150},
|
||||
{ValueType::kTypeDeletion, ValueType::kTypeDeletion,
|
||||
ValueType::kTypeValue, ValueType::kTypeMerge, ValueType::kTypeMerge},
|
||||
file_id++, write_global_seqno, &true_data));
|
||||
file_id++, write_global_seqno, verify_checksums_before_ingest,
|
||||
&true_data));
|
||||
// A global seqno will be assigned anyway because of the snapshot
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 5);
|
||||
|
||||
@ -614,7 +637,8 @@ TEST_P(ExternalSSTFileBasicTest, IngestFileWithMixedValueType) {
|
||||
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {5000, 5001}, {ValueType::kTypeValue, ValueType::kTypeMerge},
|
||||
file_id++, write_global_seqno, &true_data));
|
||||
file_id++, write_global_seqno, verify_checksums_before_ingest,
|
||||
&true_data));
|
||||
// No snapshot anymore, no need to assign a seqno
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno + 5);
|
||||
|
||||
@ -690,13 +714,15 @@ TEST_P(ExternalSSTFileBasicTest, IngestionWithRangeDeletions) {
|
||||
ASSERT_EQ(0, NumTableFilesAtLevel(kNumLevels - 2));
|
||||
ASSERT_EQ(1, NumTableFilesAtLevel(kNumLevels - 1));
|
||||
|
||||
bool write_global_seqno = GetParam();
|
||||
bool write_global_seqno = std::get<0>(GetParam());
|
||||
bool verify_checksums_before_ingest = std::get<1>(GetParam());
|
||||
// overlaps with L0 file but not memtable, so flush is skipped and file is
|
||||
// ingested into L0
|
||||
SequenceNumber last_seqno = dbfull()->GetLatestSequenceNumber();
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {60, 90}, {ValueType::kTypeValue, ValueType::kTypeValue},
|
||||
{{65, 70}, {70, 85}}, file_id++, write_global_seqno, &true_data));
|
||||
{{65, 70}, {70, 85}}, file_id++, write_global_seqno,
|
||||
verify_checksums_before_ingest, &true_data));
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), ++last_seqno);
|
||||
ASSERT_EQ(2, NumTableFilesAtLevel(0));
|
||||
ASSERT_EQ(0, NumTableFilesAtLevel(kNumLevels - 2));
|
||||
@ -706,7 +732,8 @@ TEST_P(ExternalSSTFileBasicTest, IngestionWithRangeDeletions) {
|
||||
// file is ingested into L5
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {10, 40}, {ValueType::kTypeValue, ValueType::kTypeValue},
|
||||
file_id++, write_global_seqno, &true_data));
|
||||
file_id++, write_global_seqno, verify_checksums_before_ingest,
|
||||
&true_data));
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), ++last_seqno);
|
||||
ASSERT_EQ(2, NumTableFilesAtLevel(0));
|
||||
ASSERT_EQ(1, NumTableFilesAtLevel(kNumLevels - 2));
|
||||
@ -714,8 +741,9 @@ TEST_P(ExternalSSTFileBasicTest, IngestionWithRangeDeletions) {
|
||||
|
||||
// overlaps with L5 file but not memtable or L0 file, so flush is skipped and
|
||||
// file is ingested into L4
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {}, {}, {{5, 15}}, file_id++,
|
||||
write_global_seqno, &true_data));
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {}, {}, {{5, 15}}, file_id++, write_global_seqno,
|
||||
verify_checksums_before_ingest, &true_data));
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), ++last_seqno);
|
||||
ASSERT_EQ(2, NumTableFilesAtLevel(0));
|
||||
ASSERT_EQ(1, NumTableFilesAtLevel(kNumLevels - 2));
|
||||
@ -727,7 +755,8 @@ TEST_P(ExternalSSTFileBasicTest, IngestionWithRangeDeletions) {
|
||||
// count increases by two.
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {100, 140}, {ValueType::kTypeValue, ValueType::kTypeValue},
|
||||
file_id++, write_global_seqno, &true_data));
|
||||
file_id++, write_global_seqno, verify_checksums_before_ingest,
|
||||
&true_data));
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), ++last_seqno);
|
||||
ASSERT_EQ(4, NumTableFilesAtLevel(0));
|
||||
ASSERT_EQ(1, NumTableFilesAtLevel(kNumLevels - 2));
|
||||
@ -740,15 +769,101 @@ TEST_P(ExternalSSTFileBasicTest, IngestionWithRangeDeletions) {
|
||||
// seqnum.
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {151, 175}, {ValueType::kTypeValue, ValueType::kTypeValue},
|
||||
{{160, 200}}, file_id++, write_global_seqno, &true_data));
|
||||
{{160, 200}}, file_id++, write_global_seqno,
|
||||
verify_checksums_before_ingest, &true_data));
|
||||
ASSERT_EQ(dbfull()->GetLatestSequenceNumber(), last_seqno);
|
||||
ASSERT_EQ(4, NumTableFilesAtLevel(0));
|
||||
ASSERT_EQ(1, NumTableFilesAtLevel(kNumLevels - 2));
|
||||
ASSERT_EQ(2, NumTableFilesAtLevel(options.num_levels - 1));
|
||||
}
|
||||
|
||||
TEST_P(ExternalSSTFileBasicTest, IngestFileWithBadBlockChecksum) {
|
||||
bool change_checksum_called = false;
|
||||
const auto& change_checksum = [&](void* arg) {
|
||||
if (!change_checksum_called) {
|
||||
char* buf = reinterpret_cast<char*>(arg);
|
||||
assert(nullptr != buf);
|
||||
buf[0] ^= 0x1;
|
||||
change_checksum_called = true;
|
||||
}
|
||||
};
|
||||
SyncPoint::GetInstance()->DisableProcessing();
|
||||
SyncPoint::GetInstance()->ClearAllCallBacks();
|
||||
SyncPoint::GetInstance()->SetCallBack(
|
||||
"BlockBasedTableBuilder::WriteRawBlock:TamperWithChecksum",
|
||||
change_checksum);
|
||||
SyncPoint::GetInstance()->EnableProcessing();
|
||||
int file_id = 0;
|
||||
bool write_global_seqno = std::get<0>(GetParam());
|
||||
bool verify_checksums_before_ingest = std::get<1>(GetParam());
|
||||
do {
|
||||
Options options = CurrentOptions();
|
||||
DestroyAndReopen(options);
|
||||
std::map<std::string, std::string> true_data;
|
||||
Status s = GenerateAndAddExternalFile(
|
||||
options, {1, 2, 3, 4, 5, 6}, ValueType::kTypeValue, file_id++,
|
||||
write_global_seqno, verify_checksums_before_ingest, &true_data);
|
||||
if (verify_checksums_before_ingest) {
|
||||
ASSERT_NOK(s);
|
||||
} else {
|
||||
ASSERT_OK(s);
|
||||
}
|
||||
change_checksum_called = false;
|
||||
} while (ChangeOptionsForFileIngestionTest());
|
||||
}
|
||||
|
||||
TEST_P(ExternalSSTFileBasicTest, IngestFileWithFirstByteTampered) {
|
||||
SyncPoint::GetInstance()->DisableProcessing();
|
||||
int file_id = 0;
|
||||
EnvOptions env_options;
|
||||
do {
|
||||
Options options = CurrentOptions();
|
||||
std::string file_path = sst_files_dir_ + ToString(file_id++);
|
||||
SstFileWriter sst_file_writer(env_options, options);
|
||||
Status s = sst_file_writer.Open(file_path);
|
||||
ASSERT_OK(s);
|
||||
for (int i = 0; i != 100; ++i) {
|
||||
std::string key = Key(i);
|
||||
std::string value = Key(i) + ToString(0);
|
||||
ASSERT_OK(sst_file_writer.Put(key, value));
|
||||
}
|
||||
ASSERT_OK(sst_file_writer.Finish());
|
||||
{
|
||||
// Get file size
|
||||
uint64_t file_size = 0;
|
||||
ASSERT_OK(env_->GetFileSize(file_path, &file_size));
|
||||
ASSERT_GT(file_size, 8);
|
||||
std::unique_ptr<RandomRWFile> rwfile;
|
||||
ASSERT_OK(env_->NewRandomRWFile(file_path, &rwfile, EnvOptions()));
|
||||
// Manually corrupt the file
|
||||
// We deterministically corrupt the first byte because we currently
|
||||
// cannot choose a random offset. The reason for this limitation is that
|
||||
// we do not checksum property block at present.
|
||||
const uint64_t offset = 0;
|
||||
char scratch[8] = {0};
|
||||
Slice buf;
|
||||
ASSERT_OK(rwfile->Read(offset, sizeof(scratch), &buf, scratch));
|
||||
scratch[0] ^= 0xff; // flip one bit
|
||||
ASSERT_OK(rwfile->Write(offset, buf));
|
||||
}
|
||||
// Ingest file.
|
||||
IngestExternalFileOptions ifo;
|
||||
ifo.write_global_seqno = std::get<0>(GetParam());
|
||||
ifo.verify_checksums_before_ingest = std::get<1>(GetParam());
|
||||
s = db_->IngestExternalFile({file_path}, ifo);
|
||||
if (ifo.verify_checksums_before_ingest) {
|
||||
ASSERT_NOK(s);
|
||||
} else {
|
||||
ASSERT_OK(s);
|
||||
}
|
||||
} while (ChangeOptionsForFileIngestionTest());
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(ExternalSSTFileBasicTest, ExternalSSTFileBasicTest,
|
||||
testing::Bool());
|
||||
testing::Values(std::make_tuple(true, true),
|
||||
std::make_tuple(true, false),
|
||||
std::make_tuple(false, true),
|
||||
std::make_tuple(false, false)));
|
||||
|
||||
#endif // ROCKSDB_LITE
|
||||
|
||||
|
@ -316,6 +316,13 @@ Status ExternalSstFileIngestionJob::GetIngestedFileInfo(
|
||||
return status;
|
||||
}
|
||||
|
||||
if (ingestion_options_.verify_checksums_before_ingest) {
|
||||
status = table_reader->VerifyChecksum();
|
||||
}
|
||||
if (!status.ok()) {
|
||||
return status;
|
||||
}
|
||||
|
||||
// Get the external file properties
|
||||
auto props = table_reader->GetTableProperties();
|
||||
const auto& uprops = props->user_collected_properties;
|
||||
|
@ -15,8 +15,9 @@
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
class ExternalSSTFileTest : public DBTestBase,
|
||||
public ::testing::WithParamInterface<bool> {
|
||||
class ExternalSSTFileTest
|
||||
: public DBTestBase,
|
||||
public ::testing::WithParamInterface<std::tuple<bool, bool>> {
|
||||
public:
|
||||
ExternalSSTFileTest() : DBTestBase("/external_sst_file_test") {
|
||||
sst_files_dir_ = dbname_ + "/sst_files/";
|
||||
@ -32,7 +33,7 @@ class ExternalSSTFileTest : public DBTestBase,
|
||||
const Options options,
|
||||
std::vector<std::pair<std::string, std::string>> data, int file_id = -1,
|
||||
bool allow_global_seqno = false, bool write_global_seqno = false,
|
||||
bool sort_data = false,
|
||||
bool verify_checksums_before_ingest = true, bool sort_data = false,
|
||||
std::map<std::string, std::string>* true_data = nullptr,
|
||||
ColumnFamilyHandle* cfh = nullptr) {
|
||||
// Generate a file id if not provided
|
||||
@ -76,6 +77,7 @@ class ExternalSSTFileTest : public DBTestBase,
|
||||
IngestExternalFileOptions ifo;
|
||||
ifo.allow_global_seqno = allow_global_seqno;
|
||||
ifo.write_global_seqno = allow_global_seqno ? write_global_seqno : false;
|
||||
ifo.verify_checksums_before_ingest = verify_checksums_before_ingest;
|
||||
if (cfh) {
|
||||
s = db_->IngestExternalFile(cfh, {file_path}, ifo);
|
||||
} else {
|
||||
@ -155,31 +157,32 @@ class ExternalSSTFileTest : public DBTestBase,
|
||||
Status GenerateAndAddExternalFile(
|
||||
const Options options, std::vector<std::pair<int, std::string>> data,
|
||||
int file_id = -1, bool allow_global_seqno = false,
|
||||
bool write_global_seqno = false, bool sort_data = false,
|
||||
bool write_global_seqno = false,
|
||||
bool verify_checksums_before_ingest = true, bool sort_data = false,
|
||||
std::map<std::string, std::string>* true_data = nullptr,
|
||||
ColumnFamilyHandle* cfh = nullptr) {
|
||||
std::vector<std::pair<std::string, std::string>> file_data;
|
||||
for (auto& entry : data) {
|
||||
file_data.emplace_back(Key(entry.first), entry.second);
|
||||
}
|
||||
return GenerateAndAddExternalFile(options, file_data, file_id,
|
||||
allow_global_seqno, write_global_seqno,
|
||||
sort_data, true_data, cfh);
|
||||
return GenerateAndAddExternalFile(
|
||||
options, file_data, file_id, allow_global_seqno, write_global_seqno,
|
||||
verify_checksums_before_ingest, sort_data, true_data, cfh);
|
||||
}
|
||||
|
||||
Status GenerateAndAddExternalFile(
|
||||
const Options options, std::vector<int> keys, int file_id = -1,
|
||||
bool allow_global_seqno = false, bool write_global_seqno = false,
|
||||
bool sort_data = false,
|
||||
bool verify_checksums_before_ingest = true, bool sort_data = false,
|
||||
std::map<std::string, std::string>* true_data = nullptr,
|
||||
ColumnFamilyHandle* cfh = nullptr) {
|
||||
std::vector<std::pair<std::string, std::string>> file_data;
|
||||
for (auto& k : keys) {
|
||||
file_data.emplace_back(Key(k), Key(k) + ToString(file_id));
|
||||
}
|
||||
return GenerateAndAddExternalFile(options, file_data, file_id,
|
||||
allow_global_seqno, write_global_seqno,
|
||||
sort_data, true_data, cfh);
|
||||
return GenerateAndAddExternalFile(
|
||||
options, file_data, file_id, allow_global_seqno, write_global_seqno,
|
||||
verify_checksums_before_ingest, sort_data, true_data, cfh);
|
||||
}
|
||||
|
||||
Status DeprecatedAddFile(const std::vector<std::string>& files,
|
||||
@ -1157,13 +1160,13 @@ TEST_P(ExternalSSTFileTest, PickedLevel) {
|
||||
std::map<std::string, std::string> true_data;
|
||||
|
||||
// File 0 will go to last level (L3)
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {1, 10}, -1, false, false,
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {1, 10}, -1, false, false, true,
|
||||
false, &true_data));
|
||||
EXPECT_EQ(FilesPerLevel(), "0,0,0,1");
|
||||
|
||||
// File 1 will go to level L2 (since it overlap with file 0 in L3)
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {2, 9}, -1, false, false, false,
|
||||
&true_data));
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {2, 9}, -1, false, false, true,
|
||||
false, &true_data));
|
||||
EXPECT_EQ(FilesPerLevel(), "0,0,1,1");
|
||||
|
||||
rocksdb::SyncPoint::GetInstance()->LoadDependency({
|
||||
@ -1192,13 +1195,13 @@ TEST_P(ExternalSSTFileTest, PickedLevel) {
|
||||
|
||||
// This file overlaps with file 0 (L3), file 1 (L2) and the
|
||||
// output of compaction going to L1
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {4, 7}, -1, false, false, false,
|
||||
&true_data));
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {4, 7}, -1, false, false, true,
|
||||
false, &true_data));
|
||||
EXPECT_EQ(FilesPerLevel(), "5,0,1,1");
|
||||
|
||||
// This file does not overlap with any file or with the running compaction
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {9000, 9001}, -1, false, false,
|
||||
false, &true_data));
|
||||
true, false, &true_data));
|
||||
EXPECT_EQ(FilesPerLevel(), "5,0,1,2");
|
||||
|
||||
// Hold compaction from finishing
|
||||
@ -1318,7 +1321,7 @@ TEST_F(ExternalSSTFileTest, IngestNonExistingFile) {
|
||||
|
||||
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
||||
ASSERT_OK(dbfull()->TEST_WaitForCompact(true));
|
||||
|
||||
|
||||
// After full compaction, there should be only 1 file.
|
||||
std::vector<std::string> files;
|
||||
env_->GetChildren(dbname_, &files);
|
||||
@ -1429,12 +1432,12 @@ TEST_F(ExternalSSTFileTest, PickedLevelDynamic) {
|
||||
// This file overlaps with the output of the compaction (going to L3)
|
||||
// so the file will be added to L0 since L3 is the base level
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {31, 32, 33, 34}, -1, false,
|
||||
false, false, &true_data));
|
||||
false, true, false, &true_data));
|
||||
EXPECT_EQ(FilesPerLevel(), "5");
|
||||
|
||||
// This file does not overlap with the current running compactiong
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {9000, 9001}, -1, false, false,
|
||||
false, &true_data));
|
||||
true, false, &true_data));
|
||||
EXPECT_EQ(FilesPerLevel(), "5,0,0,1");
|
||||
|
||||
// Hold compaction from finishing
|
||||
@ -1449,25 +1452,25 @@ TEST_F(ExternalSSTFileTest, PickedLevelDynamic) {
|
||||
Reopen(options);
|
||||
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {1, 15, 19}, -1, false, false,
|
||||
false, &true_data));
|
||||
true, false, &true_data));
|
||||
ASSERT_EQ(FilesPerLevel(), "1,0,0,3");
|
||||
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {1000, 1001, 1002}, -1, false,
|
||||
false, false, &true_data));
|
||||
false, true, false, &true_data));
|
||||
ASSERT_EQ(FilesPerLevel(), "1,0,0,4");
|
||||
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {500, 600, 700}, -1, false,
|
||||
false, false, &true_data));
|
||||
false, true, false, &true_data));
|
||||
ASSERT_EQ(FilesPerLevel(), "1,0,0,5");
|
||||
|
||||
// File 5 overlaps with file 2 (L3 / base level)
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {2, 10}, -1, false, false,
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {2, 10}, -1, false, false, true,
|
||||
false, &true_data));
|
||||
ASSERT_EQ(FilesPerLevel(), "2,0,0,5");
|
||||
|
||||
// File 6 overlaps with file 2 (L3 / base level) and file 5 (L0)
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {3, 9}, -1, false, false, false,
|
||||
&true_data));
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {3, 9}, -1, false, false, true,
|
||||
false, &true_data));
|
||||
ASSERT_EQ(FilesPerLevel(), "3,0,0,5");
|
||||
|
||||
// Verify data in files
|
||||
@ -1486,7 +1489,7 @@ TEST_F(ExternalSSTFileTest, PickedLevelDynamic) {
|
||||
|
||||
// File 7 overlaps with file 4 (L3)
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {650, 651, 652}, -1, false,
|
||||
false, false, &true_data));
|
||||
false, true, false, &true_data));
|
||||
ASSERT_EQ(FilesPerLevel(), "5,0,0,5");
|
||||
|
||||
VerifyDBFromMap(true_data, &kcnt, false);
|
||||
@ -1626,7 +1629,8 @@ TEST_P(ExternalSSTFileTest, IngestFileWithGlobalSeqnoRandomized) {
|
||||
options.level0_slowdown_writes_trigger = 256;
|
||||
options.level0_stop_writes_trigger = 256;
|
||||
|
||||
bool write_global_seqno = GetParam();
|
||||
bool write_global_seqno = std::get<0>(GetParam());
|
||||
bool verify_checksums_before_ingest = std::get<1>(GetParam());
|
||||
for (int iter = 0; iter < 2; iter++) {
|
||||
bool write_to_memtable = (iter == 0);
|
||||
DestroyAndReopen(options);
|
||||
@ -1650,9 +1654,9 @@ TEST_P(ExternalSSTFileTest, IngestFileWithGlobalSeqnoRandomized) {
|
||||
true_data[entry.first] = entry.second;
|
||||
}
|
||||
} else {
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, random_data, -1, true,
|
||||
write_global_seqno, true,
|
||||
&true_data));
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, random_data, -1, true, write_global_seqno,
|
||||
verify_checksums_before_ingest, true, &true_data));
|
||||
}
|
||||
}
|
||||
size_t kcnt = 0;
|
||||
@ -1681,9 +1685,11 @@ TEST_P(ExternalSSTFileTest, IngestFileWithGlobalSeqnoAssignedLevel) {
|
||||
for (int i = 0; i <= 20; i++) {
|
||||
file_data.emplace_back(Key(i), "L4");
|
||||
}
|
||||
bool write_global_seqno = GetParam();
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, file_data, -1, true,
|
||||
write_global_seqno, false, &true_data));
|
||||
bool write_global_seqno = std::get<0>(GetParam());
|
||||
bool verify_checksums_before_ingest = std::get<1>(GetParam());
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, file_data, -1, true, write_global_seqno,
|
||||
verify_checksums_before_ingest, false, &true_data));
|
||||
|
||||
// This file dont overlap with anything in the DB, will go to L4
|
||||
ASSERT_EQ("0,0,0,0,1", FilesPerLevel());
|
||||
@ -1693,8 +1699,9 @@ TEST_P(ExternalSSTFileTest, IngestFileWithGlobalSeqnoAssignedLevel) {
|
||||
for (int i = 80; i <= 130; i++) {
|
||||
file_data.emplace_back(Key(i), "L0");
|
||||
}
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, file_data, -1, true,
|
||||
write_global_seqno, false, &true_data));
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, file_data, -1, true, write_global_seqno,
|
||||
verify_checksums_before_ingest, false, &true_data));
|
||||
|
||||
// This file overlap with the memtable, so it will flush it and add
|
||||
// it self to L0
|
||||
@ -1705,8 +1712,9 @@ TEST_P(ExternalSSTFileTest, IngestFileWithGlobalSeqnoAssignedLevel) {
|
||||
for (int i = 30; i <= 50; i++) {
|
||||
file_data.emplace_back(Key(i), "L4");
|
||||
}
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, file_data, -1, true,
|
||||
write_global_seqno, false, &true_data));
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, file_data, -1, true, write_global_seqno,
|
||||
verify_checksums_before_ingest, false, &true_data));
|
||||
|
||||
// This file dont overlap with anything in the DB and fit in L4 as well
|
||||
ASSERT_EQ("2,0,0,0,2", FilesPerLevel());
|
||||
@ -1716,8 +1724,9 @@ TEST_P(ExternalSSTFileTest, IngestFileWithGlobalSeqnoAssignedLevel) {
|
||||
for (int i = 10; i <= 40; i++) {
|
||||
file_data.emplace_back(Key(i), "L3");
|
||||
}
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, file_data, -1, true,
|
||||
write_global_seqno, false, &true_data));
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, file_data, -1, true, write_global_seqno,
|
||||
verify_checksums_before_ingest, false, &true_data));
|
||||
|
||||
// This file overlap with files in L4, we will ingest it in L3
|
||||
ASSERT_EQ("2,0,0,1,2", FilesPerLevel());
|
||||
@ -1740,17 +1749,20 @@ TEST_P(ExternalSSTFileTest, IngestFileWithGlobalSeqnoMemtableFlush) {
|
||||
&entries_in_memtable);
|
||||
ASSERT_GE(entries_in_memtable, 1);
|
||||
|
||||
bool write_global_seqno = GetParam();
|
||||
bool write_global_seqno = std::get<0>(GetParam());
|
||||
bool verify_checksums_before_ingest = std::get<1>(GetParam());
|
||||
// No need for flush
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {90, 100, 110}, -1, true,
|
||||
write_global_seqno, false, &true_data));
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {90, 100, 110}, -1, true, write_global_seqno,
|
||||
verify_checksums_before_ingest, false, &true_data));
|
||||
db_->GetIntProperty(DB::Properties::kNumEntriesActiveMemTable,
|
||||
&entries_in_memtable);
|
||||
ASSERT_GE(entries_in_memtable, 1);
|
||||
|
||||
// This file will flush the memtable
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {19, 20, 21}, -1, true,
|
||||
write_global_seqno, false, &true_data));
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {19, 20, 21}, -1, true, write_global_seqno,
|
||||
verify_checksums_before_ingest, false, &true_data));
|
||||
db_->GetIntProperty(DB::Properties::kNumEntriesActiveMemTable,
|
||||
&entries_in_memtable);
|
||||
ASSERT_EQ(entries_in_memtable, 0);
|
||||
@ -1764,15 +1776,17 @@ TEST_P(ExternalSSTFileTest, IngestFileWithGlobalSeqnoMemtableFlush) {
|
||||
ASSERT_GE(entries_in_memtable, 1);
|
||||
|
||||
// No need for flush, this file keys fit between the memtable keys
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {202, 203, 204}, -1, true,
|
||||
write_global_seqno, false, &true_data));
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {202, 203, 204}, -1, true, write_global_seqno,
|
||||
verify_checksums_before_ingest, false, &true_data));
|
||||
db_->GetIntProperty(DB::Properties::kNumEntriesActiveMemTable,
|
||||
&entries_in_memtable);
|
||||
ASSERT_GE(entries_in_memtable, 1);
|
||||
|
||||
// This file will flush the memtable
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {206, 207}, -1, true, false,
|
||||
write_global_seqno, &true_data));
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {206, 207}, -1, true, false, write_global_seqno,
|
||||
verify_checksums_before_ingest, &true_data));
|
||||
db_->GetIntProperty(DB::Properties::kNumEntriesActiveMemTable,
|
||||
&entries_in_memtable);
|
||||
ASSERT_EQ(entries_in_memtable, 0);
|
||||
@ -1790,13 +1804,16 @@ TEST_P(ExternalSSTFileTest, L0SortingIssue) {
|
||||
ASSERT_OK(Put(Key(1), "memtable"));
|
||||
ASSERT_OK(Put(Key(10), "memtable"));
|
||||
|
||||
bool write_global_seqno = GetParam();
|
||||
bool write_global_seqno = std::get<0>(GetParam());
|
||||
bool verify_checksums_before_ingest = std::get<1>(GetParam());
|
||||
// No Flush needed, No global seqno needed, Ingest in L1
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {7, 8}, -1, true,
|
||||
write_global_seqno, false));
|
||||
write_global_seqno,
|
||||
verify_checksums_before_ingest, false));
|
||||
// No Flush needed, but need a global seqno, Ingest in L0
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {7, 8}, -1, true,
|
||||
write_global_seqno, false));
|
||||
write_global_seqno,
|
||||
verify_checksums_before_ingest, false));
|
||||
printf("%s\n", FilesPerLevel().c_str());
|
||||
|
||||
// Overwrite what we added using external files
|
||||
@ -2032,11 +2049,12 @@ TEST_P(ExternalSSTFileTest, IngestionListener) {
|
||||
options.listeners.emplace_back(listener);
|
||||
CreateAndReopenWithCF({"koko", "toto"}, options);
|
||||
|
||||
bool write_global_seqno = GetParam();
|
||||
bool write_global_seqno = std::get<0>(GetParam());
|
||||
bool verify_checksums_before_ingest = std::get<1>(GetParam());
|
||||
// Ingest into default cf
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {1, 2}, -1, true,
|
||||
write_global_seqno, true, nullptr,
|
||||
handles_[0]));
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {1, 2}, -1, true, write_global_seqno,
|
||||
verify_checksums_before_ingest, true, nullptr, handles_[0]));
|
||||
ASSERT_EQ(listener->ingested_files.size(), 1);
|
||||
ASSERT_EQ(listener->ingested_files.back().cf_name, "default");
|
||||
ASSERT_EQ(listener->ingested_files.back().global_seqno, 0);
|
||||
@ -2046,9 +2064,9 @@ TEST_P(ExternalSSTFileTest, IngestionListener) {
|
||||
"default");
|
||||
|
||||
// Ingest into cf1
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {1, 2}, -1, true,
|
||||
write_global_seqno, true, nullptr,
|
||||
handles_[1]));
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {1, 2}, -1, true, write_global_seqno,
|
||||
verify_checksums_before_ingest, true, nullptr, handles_[1]));
|
||||
ASSERT_EQ(listener->ingested_files.size(), 2);
|
||||
ASSERT_EQ(listener->ingested_files.back().cf_name, "koko");
|
||||
ASSERT_EQ(listener->ingested_files.back().global_seqno, 0);
|
||||
@ -2058,9 +2076,9 @@ TEST_P(ExternalSSTFileTest, IngestionListener) {
|
||||
"koko");
|
||||
|
||||
// Ingest into cf2
|
||||
ASSERT_OK(GenerateAndAddExternalFile(options, {1, 2}, -1, true,
|
||||
write_global_seqno, true, nullptr,
|
||||
handles_[2]));
|
||||
ASSERT_OK(GenerateAndAddExternalFile(
|
||||
options, {1, 2}, -1, true, write_global_seqno,
|
||||
verify_checksums_before_ingest, true, nullptr, handles_[2]));
|
||||
ASSERT_EQ(listener->ingested_files.size(), 3);
|
||||
ASSERT_EQ(listener->ingested_files.back().cf_name, "toto");
|
||||
ASSERT_EQ(listener->ingested_files.back().global_seqno, 0);
|
||||
@ -2126,7 +2144,8 @@ TEST_P(ExternalSSTFileTest, IngestBehind) {
|
||||
IngestExternalFileOptions ifo;
|
||||
ifo.allow_global_seqno = true;
|
||||
ifo.ingest_behind = true;
|
||||
ifo.write_global_seqno = GetParam();
|
||||
ifo.write_global_seqno = std::get<0>(GetParam());
|
||||
ifo.verify_checksums_before_ingest = std::get<1>(GetParam());
|
||||
|
||||
// Can't ingest behind since allow_ingest_behind isn't set to true
|
||||
ASSERT_NOK(GenerateAndAddExternalFileIngestBehind(options, ifo,
|
||||
@ -2215,7 +2234,10 @@ TEST_F(ExternalSSTFileTest, SkipBloomFilter) {
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(ExternalSSTFileTest, ExternalSSTFileTest,
|
||||
testing::Bool());
|
||||
testing::Values(std::make_tuple(false, false),
|
||||
std::make_tuple(false, true),
|
||||
std::make_tuple(true, false),
|
||||
std::make_tuple(true, true)));
|
||||
|
||||
} // namespace rocksdb
|
||||
|
||||
|
@ -295,7 +295,7 @@ struct ColumnFamilyOptions : public AdvancedColumnFamilyOptions {
|
||||
std::vector<DbPath> cf_paths;
|
||||
|
||||
// Compaction concurrent thread limiter for the column family.
|
||||
// If non-nullptr, use given concurrent thread limiter to control
|
||||
// If non-nullptr, use given concurrent thread limiter to control
|
||||
// the max outstanding compaction tasks. Limiter can be shared with
|
||||
// multiple column families across db instances.
|
||||
//
|
||||
@ -1344,6 +1344,11 @@ struct IngestExternalFileOptions {
|
||||
// 2. Without writing external SST file, it's possible to do checksum.
|
||||
// We have a plan to set this option to false by default in the future.
|
||||
bool write_global_seqno = true;
|
||||
// Set to true if you would like to verify the checksums of each block of the
|
||||
// external SST file before ingestion.
|
||||
// Warning: setting this to true causes slowdown in file ingestion because
|
||||
// the external SST file has to be read.
|
||||
bool verify_checksums_before_ingest = false;
|
||||
};
|
||||
|
||||
// TraceOptions is used for StartTrace
|
||||
|
@ -634,6 +634,9 @@ void BlockBasedTableBuilder::WriteRawBlock(const Slice& block_contents,
|
||||
}
|
||||
|
||||
assert(r->status.ok());
|
||||
TEST_SYNC_POINT_CALLBACK(
|
||||
"BlockBasedTableBuilder::WriteRawBlock:TamperWithChecksum",
|
||||
static_cast<char*>(trailer));
|
||||
r->status = r->file->Append(Slice(trailer, kBlockTrailerSize));
|
||||
if (r->status.ok()) {
|
||||
r->status = InsertBlockInCache(block_contents, type, handle);
|
||||
|
Loading…
Reference in New Issue
Block a user