Fb 9718 verify checksums is ignored (#9767)

Summary:
Fixes https://github.com/facebook/rocksdb/issues/9718

The verify_checksums flag of read_options should be passed to the read options used by the BlockFetcher in a couple of cases where it is not at present. It will now happen (but did not, previously) on iteration and on [multi]get, where a fetcher is created as part of the iterate/get call.

This may result in much better performance in a few workloads where the client chooses to remove verification.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/9767

Reviewed By: mrambacher

Differential Revision: D35218986

Pulled By: jay-zhuang

fbshipit-source-id: 329d29764bb70fbc7f2673440bc46c107a813bc8
This commit is contained in:
Alan Paxton 2022-03-29 11:54:54 -07:00 committed by Facebook GitHub Bot
parent a5e5130556
commit b6ad0d958f
11 changed files with 233 additions and 7 deletions

View File

@ -431,6 +431,8 @@ enum Tickers : uint32_t {
NON_LAST_LEVEL_READ_BYTES, NON_LAST_LEVEL_READ_BYTES,
NON_LAST_LEVEL_READ_COUNT, NON_LAST_LEVEL_READ_COUNT,
BLOCK_CHECKSUM_COMPUTE_COUNT,
TICKER_ENUM_MAX TICKER_ENUM_MAX
}; };

View File

@ -187,6 +187,7 @@ JAVA_TESTS = \
org.rocksdb.TtlDBTest\ org.rocksdb.TtlDBTest\
org.rocksdb.StatisticsTest\ org.rocksdb.StatisticsTest\
org.rocksdb.StatisticsCollectorTest\ org.rocksdb.StatisticsCollectorTest\
org.rocksdb.VerifyChecksumsTest\
org.rocksdb.WalFilterTest\ org.rocksdb.WalFilterTest\
org.rocksdb.WALRecoveryModeTest\ org.rocksdb.WALRecoveryModeTest\
org.rocksdb.WriteBatchHandlerTest\ org.rocksdb.WriteBatchHandlerTest\

View File

@ -5086,6 +5086,8 @@ class TickerTypeJni {
return -0x2C; return -0x2C;
case ROCKSDB_NAMESPACE::Tickers::NON_LAST_LEVEL_READ_COUNT: case ROCKSDB_NAMESPACE::Tickers::NON_LAST_LEVEL_READ_COUNT:
return -0x2D; return -0x2D;
case ROCKSDB_NAMESPACE::Tickers::BLOCK_CHECKSUM_COMPUTE_COUNT:
return -0x2E;
case ROCKSDB_NAMESPACE::Tickers::TICKER_ENUM_MAX: case ROCKSDB_NAMESPACE::Tickers::TICKER_ENUM_MAX:
// 0x5F was the max value in the initial copy of tickers to Java. // 0x5F was the max value in the initial copy of tickers to Java.
// Since these values are exposed directly to Java clients, we keep // Since these values are exposed directly to Java clients, we keep
@ -5455,6 +5457,8 @@ class TickerTypeJni {
return ROCKSDB_NAMESPACE::Tickers::NON_LAST_LEVEL_READ_BYTES; return ROCKSDB_NAMESPACE::Tickers::NON_LAST_LEVEL_READ_BYTES;
case -0x2D: case -0x2D:
return ROCKSDB_NAMESPACE::Tickers::NON_LAST_LEVEL_READ_COUNT; return ROCKSDB_NAMESPACE::Tickers::NON_LAST_LEVEL_READ_COUNT;
case -0x2E:
return ROCKSDB_NAMESPACE::Tickers::BLOCK_CHECKSUM_COMPUTE_COUNT;
case 0x5F: case 0x5F:
// 0x5F was the max value in the initial copy of tickers to Java. // 0x5F was the max value in the initial copy of tickers to Java.
// Since these values are exposed directly to Java clients, we keep // Since these values are exposed directly to Java clients, we keep

View File

@ -804,6 +804,8 @@ public enum TickerType {
NON_LAST_LEVEL_READ_BYTES((byte) -0x2C), NON_LAST_LEVEL_READ_BYTES((byte) -0x2C),
NON_LAST_LEVEL_READ_COUNT((byte) -0x2D), NON_LAST_LEVEL_READ_COUNT((byte) -0x2D),
BLOCK_CHECKSUM_COMPUTE_COUNT((byte) -0x2E),
TICKER_ENUM_MAX((byte) 0x5F); TICKER_ENUM_MAX((byte) 0x5F);
private final byte value; private final byte value;

View File

@ -0,0 +1,213 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
package org.rocksdb;
import static org.assertj.core.api.Assertions.assertThat;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.junit.ClassRule;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
public class VerifyChecksumsTest {
@ClassRule
public static final RocksNativeLibraryResource ROCKS_NATIVE_LIBRARY_RESOURCE =
new RocksNativeLibraryResource();
@Rule public TemporaryFolder dbFolder = new TemporaryFolder();
/**
* Class to factor out the specific DB operations within the test
*/
abstract static class Operations {
final int kv_count;
final List<String> elements = new ArrayList<>();
final List<String> sortedElements = new ArrayList<>();
Operations(final int kv_count) {
this.kv_count = kv_count;
for (int i = 0; i < kv_count; i++) elements.add(MessageFormat.format("{0,number,#}", i));
sortedElements.addAll(elements);
Collections.sort(sortedElements);
}
void fill(final RocksDB db) throws RocksDBException {
for (int i = 0; i < kv_count; i++) {
final String key = MessageFormat.format("key{0}", elements.get(i));
final String value = MessageFormat.format("value{0}", elements.get(i));
// noinspection ObjectAllocationInLoop
db.put(key.getBytes(), value.getBytes());
}
db.flush(new FlushOptions());
}
@SuppressWarnings("ObjectAllocationInLoop")
void get(final RocksDB db, final boolean verifyFlag) throws RocksDBException {
try (final ReadOptions readOptions = new ReadOptions()) {
readOptions.setReadaheadSize(32 * 1024);
readOptions.setFillCache(false);
readOptions.setVerifyChecksums(verifyFlag);
for (int i = 0; i < kv_count / 10; i++) {
@SuppressWarnings("UnsecureRandomNumberGeneration")
final int index = Double.valueOf(Math.random() * kv_count).intValue();
final String key = MessageFormat.format("key{0}", sortedElements.get(index));
final String expectedValue = MessageFormat.format("value{0}", sortedElements.get(index));
final byte[] value = db.get(readOptions, key.getBytes());
assertThat(value).isEqualTo(expectedValue.getBytes());
}
}
}
@SuppressWarnings("ObjectAllocationInLoop")
void multiGet(final RocksDB db, final boolean verifyFlag) throws RocksDBException {
try (final ReadOptions readOptions = new ReadOptions()) {
readOptions.setReadaheadSize(32 * 1024);
readOptions.setFillCache(false);
readOptions.setVerifyChecksums(verifyFlag);
final List<byte[]> keys = new ArrayList<>();
final List<String> expectedValues = new ArrayList<>();
for (int i = 0; i < kv_count / 10; i++) {
@SuppressWarnings("UnsecureRandomNumberGeneration")
final int index = Double.valueOf(Math.random() * kv_count).intValue();
keys.add(MessageFormat.format("key{0}", sortedElements.get(index)).getBytes());
expectedValues.add(MessageFormat.format("value{0}", sortedElements.get(index)));
}
final List<byte[]> values = db.multiGetAsList(readOptions, keys);
for (int i = 0; i < keys.size(); i++) {
assertThat(values.get(i)).isEqualTo(expectedValues.get(i).getBytes());
}
}
}
void iterate(final RocksDB db, final boolean verifyFlag) throws RocksDBException {
final ReadOptions readOptions = new ReadOptions();
readOptions.setReadaheadSize(32 * 1024);
readOptions.setFillCache(false);
readOptions.setVerifyChecksums(verifyFlag);
int i = 0;
try (final RocksIterator rocksIterator = db.newIterator(readOptions)) {
rocksIterator.seekToFirst();
rocksIterator.status();
while (rocksIterator.isValid()) {
final byte[] key = rocksIterator.key();
final byte[] value = rocksIterator.value();
// noinspection ObjectAllocationInLoop
assertThat(key).isEqualTo(
(MessageFormat.format("key{0}", sortedElements.get(i))).getBytes());
// noinspection ObjectAllocationInLoop
assertThat(value).isEqualTo(
(MessageFormat.format("value{0}", sortedElements.get(i))).getBytes());
rocksIterator.next();
rocksIterator.status();
i++;
}
}
assertThat(i).isEqualTo(kv_count);
}
abstract void performOperations(final RocksDB db, final boolean verifyFlag)
throws RocksDBException;
}
private static final int KV_COUNT = 10000;
/**
* Run some operations and count the TickerType.BLOCK_CHECKSUM_COMPUTE_COUNT before and after
* It should GO UP when the read options have checksum verification turned on.
* It shoulld REMAIN UNCHANGED when the read options have checksum verification turned off.
* As the read options refer only to the read operations, there are still a few checksums
* performed outside this (blocks are getting loaded for lots of reasons, not aways directly due
* to reads) but this test provides a good enough proxy for whether the flag is being noticed.
*
* @param operations the DB reading operations to perform which affect the checksum stats
*
* @throws RocksDBException
*/
private void verifyChecksums(final Operations operations) throws RocksDBException {
final String dbPath = dbFolder.getRoot().getAbsolutePath();
// noinspection SingleStatementInBlock
try (final Statistics statistics = new Statistics();
final Options options = new Options().setCreateIfMissing(true).setStatistics(statistics)) {
try (final RocksDB db = RocksDB.open(options, dbPath)) {
// 0
System.out.println(MessageFormat.format(
"newly open {0}", statistics.getTickerCount(TickerType.BLOCK_CHECKSUM_COMPUTE_COUNT)));
operations.fill(db);
//
System.out.println(MessageFormat.format(
"flushed {0}", statistics.getTickerCount(TickerType.BLOCK_CHECKSUM_COMPUTE_COUNT)));
}
// 2
System.out.println(MessageFormat.format("closed-after-write {0}",
statistics.getTickerCount(TickerType.BLOCK_CHECKSUM_COMPUTE_COUNT)));
for (final boolean verifyFlag : new boolean[] {false, true, false, true}) {
try (final RocksDB db = RocksDB.open(options, dbPath)) {
final long beforeOperationsCount =
statistics.getTickerCount(TickerType.BLOCK_CHECKSUM_COMPUTE_COUNT);
System.out.println(MessageFormat.format("re-opened {0}", beforeOperationsCount));
operations.performOperations(db, verifyFlag);
final long afterOperationsCount =
statistics.getTickerCount(TickerType.BLOCK_CHECKSUM_COMPUTE_COUNT);
if (verifyFlag) {
// We don't need to be exact - we are checking that the checksums happen
// exactly how many depends on block size etc etc, so may not be entirely stable
System.out.println(MessageFormat.format("verify=true {0}", afterOperationsCount));
assertThat(afterOperationsCount).isGreaterThan(beforeOperationsCount + 20);
} else {
System.out.println(MessageFormat.format("verify=false {0}", afterOperationsCount));
assertThat(afterOperationsCount).isEqualTo(beforeOperationsCount);
}
}
}
}
}
@Test
public void verifyChecksumsInIteration() throws RocksDBException {
// noinspection AnonymousInnerClassMayBeStatic
verifyChecksums(new Operations(KV_COUNT) {
@Override
void performOperations(final RocksDB db, final boolean verifyFlag) throws RocksDBException {
iterate(db, verifyFlag);
}
});
}
@Test
public void verifyChecksumsGet() throws RocksDBException {
// noinspection AnonymousInnerClassMayBeStatic
verifyChecksums(new Operations(KV_COUNT) {
@Override
void performOperations(final RocksDB db, final boolean verifyFlag) throws RocksDBException {
get(db, verifyFlag);
}
});
}
@Test
public void verifyChecksumsMultiGet() throws RocksDBException {
// noinspection AnonymousInnerClassMayBeStatic
verifyChecksums(new Operations(KV_COUNT) {
@Override
void performOperations(final RocksDB db, final boolean verifyFlag) throws RocksDBException {
multiGet(db, verifyFlag);
}
});
}
}

View File

@ -226,7 +226,7 @@ const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
{LAST_LEVEL_READ_COUNT, "rocksdb.last.level.read.count"}, {LAST_LEVEL_READ_COUNT, "rocksdb.last.level.read.count"},
{NON_LAST_LEVEL_READ_BYTES, "rocksdb.non.last.level.read.bytes"}, {NON_LAST_LEVEL_READ_BYTES, "rocksdb.non.last.level.read.bytes"},
{NON_LAST_LEVEL_READ_COUNT, "rocksdb.non.last.level.read.count"}, {NON_LAST_LEVEL_READ_COUNT, "rocksdb.non.last.level.read.count"},
}; {BLOCK_CHECKSUM_COMPUTE_COUNT, "rocksdb.block.checksum.compute.count"}};
const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap = { const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap = {
{DB_GET, "rocksdb.db.get.micros"}, {DB_GET, "rocksdb.db.get.micros"},

View File

@ -2597,6 +2597,7 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options,
uncompression_dict_status = uncompression_dict_status =
rep_->uncompression_dict_reader->GetOrReadUncompressionDictionary( rep_->uncompression_dict_reader->GetOrReadUncompressionDictionary(
nullptr /* prefetch_buffer */, no_io, nullptr /* prefetch_buffer */, no_io,
read_options.verify_checksums,
sst_file_range.begin()->get_context, &lookup_context, sst_file_range.begin()->get_context, &lookup_context,
&uncompression_dict); &uncompression_dict);
uncompression_dict_inited = true; uncompression_dict_inited = true;
@ -3442,6 +3443,7 @@ Status BlockBasedTable::DumpTable(WritableFile* out_file) {
CachableEntry<UncompressionDict> uncompression_dict; CachableEntry<UncompressionDict> uncompression_dict;
s = rep_->uncompression_dict_reader->GetOrReadUncompressionDictionary( s = rep_->uncompression_dict_reader->GetOrReadUncompressionDictionary(
nullptr /* prefetch_buffer */, false /* no_io */, nullptr /* prefetch_buffer */, false /* no_io */,
false, /* verify_checksums */
nullptr /* get_context */, nullptr /* lookup_context */, nullptr /* get_context */, nullptr /* lookup_context */,
&uncompression_dict); &uncompression_dict);
if (!s.ok()) { if (!s.ok()) {

View File

@ -39,8 +39,8 @@ TBlockIter* BlockBasedTable::NewDataBlockIterator(
if (rep_->uncompression_dict_reader) { if (rep_->uncompression_dict_reader) {
const bool no_io = (ro.read_tier == kBlockCacheTier); const bool no_io = (ro.read_tier == kBlockCacheTier);
s = rep_->uncompression_dict_reader->GetOrReadUncompressionDictionary( s = rep_->uncompression_dict_reader->GetOrReadUncompressionDictionary(
prefetch_buffer, no_io, get_context, lookup_context, prefetch_buffer, no_io, ro.verify_checksums, get_context,
&uncompression_dict); lookup_context, &uncompression_dict);
if (!s.ok()) { if (!s.ok()) {
iter->Invalidate(s); iter->Invalidate(s);
return iter; return iter;

View File

@ -76,8 +76,8 @@ Status UncompressionDictReader::ReadUncompressionDictionary(
} }
Status UncompressionDictReader::GetOrReadUncompressionDictionary( Status UncompressionDictReader::GetOrReadUncompressionDictionary(
FilePrefetchBuffer* prefetch_buffer, bool no_io, GetContext* get_context, FilePrefetchBuffer* prefetch_buffer, bool no_io, bool verify_checksums,
BlockCacheLookupContext* lookup_context, GetContext* get_context, BlockCacheLookupContext* lookup_context,
CachableEntry<UncompressionDict>* uncompression_dict) const { CachableEntry<UncompressionDict>* uncompression_dict) const {
assert(uncompression_dict); assert(uncompression_dict);
@ -90,6 +90,7 @@ Status UncompressionDictReader::GetOrReadUncompressionDictionary(
if (no_io) { if (no_io) {
read_options.read_tier = kBlockCacheTier; read_options.read_tier = kBlockCacheTier;
} }
read_options.verify_checksums = verify_checksums;
return ReadUncompressionDictionary(table_, prefetch_buffer, read_options, return ReadUncompressionDictionary(table_, prefetch_buffer, read_options,
cache_dictionary_blocks(), get_context, cache_dictionary_blocks(), get_context,

View File

@ -31,8 +31,8 @@ class UncompressionDictReader {
std::unique_ptr<UncompressionDictReader>* uncompression_dict_reader); std::unique_ptr<UncompressionDictReader>* uncompression_dict_reader);
Status GetOrReadUncompressionDictionary( Status GetOrReadUncompressionDictionary(
FilePrefetchBuffer* prefetch_buffer, bool no_io, GetContext* get_context, FilePrefetchBuffer* prefetch_buffer, bool no_io, bool verify_checksums,
BlockCacheLookupContext* lookup_context, GetContext* get_context, BlockCacheLookupContext* lookup_context,
CachableEntry<UncompressionDict>* uncompression_dict) const; CachableEntry<UncompressionDict>* uncompression_dict) const;
size_t ApproximateMemoryUsage() const; size_t ApproximateMemoryUsage() const;

View File

@ -35,6 +35,7 @@ inline void BlockFetcher::ProcessTrailerIfPresent() {
io_status_ = status_to_io_status(VerifyBlockChecksum( io_status_ = status_to_io_status(VerifyBlockChecksum(
footer_.checksum_type(), slice_.data(), block_size_, footer_.checksum_type(), slice_.data(), block_size_,
file_->file_name(), handle_.offset())); file_->file_name(), handle_.offset()));
RecordTick(ioptions_.stats, BLOCK_CHECKSUM_COMPUTE_COUNT);
} }
compression_type_ = compression_type_ =
BlockBasedTable::GetBlockCompressionType(slice_.data(), block_size_); BlockBasedTable::GetBlockCompressionType(slice_.data(), block_size_);