249eff0f30
Summary: Since read threads do not coordinate on loading data into block cache, two threads between Lookup and Insert can end up loading and inserting the same data. This is particularly concerning with cache_index_and_filter_blocks since those are hot and more likely to be race targets if ejected from (or not pre-populated in) the cache. Particularly with moves toward disaggregated / network storage, the cost of redundant retrieval might be high, and we should at least have some hard statistics from which we can estimate impact. Example with full filter thrashing "cliff": $ ./db_bench --benchmarks=fillrandom --num=15000000 --cache_index_and_filter_blocks -bloom_bits=10 ... $ ./db_bench --db=/tmp/rocksdbtest-172704/dbbench --use_existing_db --benchmarks=readrandom,stats --num=200000 --cache_index_and_filter_blocks --cache_size=$((130 * 1024 * 1024)) --bloom_bits=10 --threads=16 -statistics 2>&1 | egrep '^rocksdb.block.cache.(.*add|.*redundant)' | grep -v compress | sort rocksdb.block.cache.add COUNT : 14181 rocksdb.block.cache.add.failures COUNT : 0 rocksdb.block.cache.add.redundant COUNT : 476 rocksdb.block.cache.data.add COUNT : 12749 rocksdb.block.cache.data.add.redundant COUNT : 18 rocksdb.block.cache.filter.add COUNT : 1003 rocksdb.block.cache.filter.add.redundant COUNT : 217 rocksdb.block.cache.index.add COUNT : 429 rocksdb.block.cache.index.add.redundant COUNT : 241 $ ./db_bench --db=/tmp/rocksdbtest-172704/dbbench --use_existing_db --benchmarks=readrandom,stats --num=200000 --cache_index_and_filter_blocks --cache_size=$((120 * 1024 * 1024)) --bloom_bits=10 --threads=16 -statistics 2>&1 | egrep '^rocksdb.block.cache.(.*add|.*redundant)' | grep -v compress | sort rocksdb.block.cache.add COUNT : 1182223 rocksdb.block.cache.add.failures COUNT : 0 rocksdb.block.cache.add.redundant COUNT : 302728 rocksdb.block.cache.data.add COUNT : 31425 rocksdb.block.cache.data.add.redundant COUNT : 12 rocksdb.block.cache.filter.add COUNT : 795455 rocksdb.block.cache.filter.add.redundant COUNT : 130238 rocksdb.block.cache.index.add COUNT : 355343 rocksdb.block.cache.index.add.redundant COUNT : 172478 Pull Request resolved: https://github.com/facebook/rocksdb/pull/6681 Test Plan: Some manual testing (above) and unit test covering key metrics is included Reviewed By: ltamasi Differential Revision: D21134113 Pulled By: pdillinger fbshipit-source-id: c11497b5f00f4ffdfe919823904e52d0a1a91d87 |
||
---|---|---|
.. | ||
utilities | ||
advanced_options.h | ||
c.h | ||
cache.h | ||
cleanable.h | ||
compaction_filter.h | ||
compaction_job_stats.h | ||
comparator.h | ||
concurrent_task_limiter.h | ||
convenience.h | ||
db_bench_tool.h | ||
db_dump_tool.h | ||
db_stress_tool.h | ||
db.h | ||
env_encryption.h | ||
env.h | ||
experimental.h | ||
file_checksum.h | ||
file_system.h | ||
filter_policy.h | ||
flush_block_policy.h | ||
io_status.h | ||
iostats_context.h | ||
iterator.h | ||
ldb_tool.h | ||
listener.h | ||
memory_allocator.h | ||
memtablerep.h | ||
merge_operator.h | ||
metadata.h | ||
options.h | ||
perf_context.h | ||
perf_level.h | ||
persistent_cache.h | ||
rate_limiter.h | ||
rocksdb_namespace.h | ||
slice_transform.h | ||
slice.h | ||
snapshot.h | ||
sst_dump_tool.h | ||
sst_file_manager.h | ||
sst_file_reader.h | ||
sst_file_writer.h | ||
statistics.h | ||
stats_history.h | ||
status.h | ||
table_properties.h | ||
table.h | ||
thread_status.h | ||
threadpool.h | ||
trace_reader_writer.h | ||
transaction_log.h | ||
types.h | ||
universal_compaction.h | ||
version.h | ||
wal_filter.h | ||
write_batch_base.h | ||
write_batch.h | ||
write_buffer_manager.h |