Check KeyContext status in MultiGet (#6387)
Summary: Currently, any IO errors and checksum mismatches while reading data blocks, are being ignored by the batched MultiGet. Its only looking at the GetContext state. Fix that. Pull Request resolved: https://github.com/facebook/rocksdb/pull/6387 Test Plan: Add unit tests Differential Revision: D19799819 Pulled By: anand1976 fbshipit-source-id: 46133dccbb04e64067b9fe6cda73e282203db969
This commit is contained in:
parent
4e457278fa
commit
d70011bccc
@ -9,6 +9,7 @@
|
|||||||
* Fixed issue #6316 that can cause a corruption of the MANIFEST file in the middle when writing to it fails due to no disk space.
|
* Fixed issue #6316 that can cause a corruption of the MANIFEST file in the middle when writing to it fails due to no disk space.
|
||||||
* Add DBOptions::skip_checking_sst_file_sizes_on_db_open. It disables potentially expensive checking of all sst file sizes in DB::Open().
|
* Add DBOptions::skip_checking_sst_file_sizes_on_db_open. It disables potentially expensive checking of all sst file sizes in DB::Open().
|
||||||
* BlobDB now ignores trivially moved files when updating the mapping between blob files and SSTs. This should mitigate issue #6338 where out of order flush/compaction notifications could trigger an assertion with the earlier code.
|
* BlobDB now ignores trivially moved files when updating the mapping between blob files and SSTs. This should mitigate issue #6338 where out of order flush/compaction notifications could trigger an assertion with the earlier code.
|
||||||
|
* Batched MultiGet() ignores IO errors while reading data blocks, causing it to potentially continue looking for a key and returning stale results.
|
||||||
|
|
||||||
### Performance Improvements
|
### Performance Improvements
|
||||||
* Perfom readahead when reading from option files. Inside DB, options.log_readahead_size will be used as the readahead size. In other cases, a default 512KB is used.
|
* Perfom readahead when reading from option files. Inside DB, options.log_readahead_size will be used as the readahead size. In other cases, a default 512KB is used.
|
||||||
|
@ -2011,6 +2011,90 @@ TEST_P(DBBasicTestWithParallelIO, MultiGet) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_P(DBBasicTestWithParallelIO, MultiGetWithChecksumMismatch) {
|
||||||
|
std::vector<std::string> key_data(10);
|
||||||
|
std::vector<Slice> keys;
|
||||||
|
// We cannot resize a PinnableSlice vector, so just set initial size to
|
||||||
|
// largest we think we will need
|
||||||
|
std::vector<PinnableSlice> values(10);
|
||||||
|
std::vector<Status> statuses;
|
||||||
|
int read_count = 0;
|
||||||
|
ReadOptions ro;
|
||||||
|
ro.fill_cache = fill_cache();
|
||||||
|
|
||||||
|
SyncPoint::GetInstance()->SetCallBack(
|
||||||
|
"RetrieveMultipleBlocks:VerifyChecksum", [&](void *status) {
|
||||||
|
Status* s = static_cast<Status*>(status);
|
||||||
|
read_count++;
|
||||||
|
if (read_count == 2) {
|
||||||
|
*s = Status::Corruption();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
SyncPoint::GetInstance()->EnableProcessing();
|
||||||
|
|
||||||
|
// Warm up the cache first
|
||||||
|
key_data.emplace_back(Key(0));
|
||||||
|
keys.emplace_back(Slice(key_data.back()));
|
||||||
|
key_data.emplace_back(Key(50));
|
||||||
|
keys.emplace_back(Slice(key_data.back()));
|
||||||
|
statuses.resize(keys.size());
|
||||||
|
|
||||||
|
dbfull()->MultiGet(ro, dbfull()->DefaultColumnFamily(), keys.size(),
|
||||||
|
keys.data(), values.data(), statuses.data(), true);
|
||||||
|
ASSERT_TRUE(CheckValue(0, values[0].ToString()));
|
||||||
|
//ASSERT_TRUE(CheckValue(50, values[1].ToString()));
|
||||||
|
ASSERT_EQ(statuses[0], Status::OK());
|
||||||
|
ASSERT_EQ(statuses[1], Status::Corruption());
|
||||||
|
|
||||||
|
SyncPoint::GetInstance()->DisableProcessing();
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_P(DBBasicTestWithParallelIO, MultiGetWithMissingFile) {
|
||||||
|
std::vector<std::string> key_data(10);
|
||||||
|
std::vector<Slice> keys;
|
||||||
|
// We cannot resize a PinnableSlice vector, so just set initial size to
|
||||||
|
// largest we think we will need
|
||||||
|
std::vector<PinnableSlice> values(10);
|
||||||
|
std::vector<Status> statuses;
|
||||||
|
ReadOptions ro;
|
||||||
|
ro.fill_cache = fill_cache();
|
||||||
|
|
||||||
|
SyncPoint::GetInstance()->SetCallBack(
|
||||||
|
"TableCache::MultiGet:FindTable", [&](void *status) {
|
||||||
|
Status* s = static_cast<Status*>(status);
|
||||||
|
*s = Status::IOError();
|
||||||
|
});
|
||||||
|
// DB open will create table readers unless we reduce the table cache
|
||||||
|
// capacity.
|
||||||
|
// SanitizeOptions will set max_open_files to minimum of 20. Table cache
|
||||||
|
// is allocated with max_open_files - 10 as capacity. So override
|
||||||
|
// max_open_files to 11 so table cache capacity will become 1. This will
|
||||||
|
// prevent file open during DB open and force the file to be opened
|
||||||
|
// during MultiGet
|
||||||
|
SyncPoint::GetInstance()->SetCallBack(
|
||||||
|
"SanitizeOptions::AfterChangeMaxOpenFiles", [&](void *arg) {
|
||||||
|
int* max_open_files = (int*)arg;
|
||||||
|
*max_open_files = 11;
|
||||||
|
});
|
||||||
|
SyncPoint::GetInstance()->EnableProcessing();
|
||||||
|
|
||||||
|
Reopen(CurrentOptions());
|
||||||
|
|
||||||
|
// Warm up the cache first
|
||||||
|
key_data.emplace_back(Key(0));
|
||||||
|
keys.emplace_back(Slice(key_data.back()));
|
||||||
|
key_data.emplace_back(Key(50));
|
||||||
|
keys.emplace_back(Slice(key_data.back()));
|
||||||
|
statuses.resize(keys.size());
|
||||||
|
|
||||||
|
dbfull()->MultiGet(ro, dbfull()->DefaultColumnFamily(), keys.size(),
|
||||||
|
keys.data(), values.data(), statuses.data(), true);
|
||||||
|
ASSERT_EQ(statuses[0], Status::IOError());
|
||||||
|
ASSERT_EQ(statuses[1], Status::IOError());
|
||||||
|
|
||||||
|
SyncPoint::GetInstance()->DisableProcessing();
|
||||||
|
}
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
ParallelIO, DBBasicTestWithParallelIO,
|
ParallelIO, DBBasicTestWithParallelIO,
|
||||||
// Params are as follows -
|
// Params are as follows -
|
||||||
|
@ -490,6 +490,7 @@ Status TableCache::MultiGet(const ReadOptions& options,
|
|||||||
file_options_, internal_comparator, fd, &handle, prefix_extractor,
|
file_options_, internal_comparator, fd, &handle, prefix_extractor,
|
||||||
options.read_tier == kBlockCacheTier /* no_io */,
|
options.read_tier == kBlockCacheTier /* no_io */,
|
||||||
true /* record_read_stats */, file_read_hist, skip_filters, level);
|
true /* record_read_stats */, file_read_hist, skip_filters, level);
|
||||||
|
TEST_SYNC_POINT_CALLBACK("TableCache::MultiGet:FindTable", &s);
|
||||||
if (s.ok()) {
|
if (s.ok()) {
|
||||||
t = GetTableReaderFromHandle(handle);
|
t = GetTableReaderFromHandle(handle);
|
||||||
assert(t);
|
assert(t);
|
||||||
|
@ -1965,6 +1965,11 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
|
|||||||
for (auto iter = file_range.begin(); iter != file_range.end(); ++iter) {
|
for (auto iter = file_range.begin(); iter != file_range.end(); ++iter) {
|
||||||
GetContext& get_context = *iter->get_context;
|
GetContext& get_context = *iter->get_context;
|
||||||
Status* status = iter->s;
|
Status* status = iter->s;
|
||||||
|
// The Status in the KeyContext takes precedence over GetContext state
|
||||||
|
if (!status->ok()) {
|
||||||
|
file_range.MarkKeyDone(iter);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if (get_context.sample()) {
|
if (get_context.sample()) {
|
||||||
sample_file_read_inc(f->file_metadata);
|
sample_file_read_inc(f->file_metadata);
|
||||||
|
@ -2458,6 +2458,7 @@ void BlockBasedTable::RetrieveMultipleBlocks(
|
|||||||
s = rocksdb::VerifyChecksum(footer.checksum(),
|
s = rocksdb::VerifyChecksum(footer.checksum(),
|
||||||
req.result.data() + req_offset,
|
req.result.data() + req_offset,
|
||||||
handle.size() + 1, expected);
|
handle.size() + 1, expected);
|
||||||
|
TEST_SYNC_POINT_CALLBACK("RetrieveMultipleBlocks:VerifyChecksum", &s);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user