Merge branch 'facebook:main' into checkpoint_1

This commit is contained in:
gitbw95 2022-04-22 16:49:39 -07:00 committed by GitHub
commit c9f2695977
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
184 changed files with 11880 additions and 16708 deletions

View File

@ -113,6 +113,12 @@ commands:
cmake .. -GNinja -DCMAKE_BUILD_TYPE=Release -DBENCHMARK_ENABLE_GTEST_TESTS=0
ninja && sudo ninja install
install-valgrind:
steps:
- run:
name: Install valgrind
command: sudo apt-get update -y && sudo apt-get install -y valgrind
upgrade-cmake:
steps:
- run:
@ -224,14 +230,16 @@ jobs:
- run: make V=1 J=32 -j32 check
- post-steps
build-linux-encrypted-env:
build-linux-encrypted_env-no_compression:
machine:
image: ubuntu-2004:202111-02
resource_class: 2xlarge
steps:
- pre-steps
- install-gflags
- run: ENCRYPTED_ENV=1 make V=1 J=32 -j32 check
- run: ENCRYPTED_ENV=1 ROCKSDB_DISABLE_SNAPPY=1 ROCKSDB_DISABLE_ZLIB=1 ROCKSDB_DISABLE_BZIP=1 ROCKSDB_DISABLE_LZ4=1 ROCKSDB_DISABLE_ZSTD=1 make V=1 J=32 -j32 check
- run: |
./sst_dump --help | egrep -q 'Supported compression types: kNoCompression$' # Verify no compiled in compression
- post-steps
build-linux-shared_lib-alt_namespace-status_checked:
@ -312,7 +320,7 @@ jobs:
- pre-steps
- install-gflags
- install-clang-10
- run: ASAN_OPTIONS=detect_stack_use_after_return=1 COMPILE_WITH_ASAN=1 CC=clang-10 CXX=clang++-10 ROCKSDB_DISABLE_ALIGNED_NEW=1 USE_CLANG=1 make V=1 -j32 check # aligned new doesn't work for reason we haven't figured out
- run: COMPILE_WITH_ASAN=1 CC=clang-10 CXX=clang++-10 ROCKSDB_DISABLE_ALIGNED_NEW=1 USE_CLANG=1 make V=1 -j32 check # aligned new doesn't work for reason we haven't figured out
- post-steps
build-linux-clang10-mini-tsan:
@ -356,6 +364,17 @@ jobs:
- run: COMPILE_WITH_UBSAN=1 OPT="-fsanitize-blacklist=.circleci/ubsan_suppression_list.txt" CC=clang-10 CXX=clang++-10 ROCKSDB_DISABLE_ALIGNED_NEW=1 USE_CLANG=1 make V=1 -j32 ubsan_check # aligned new doesn't work for reason we haven't figured out
- post-steps
build-linux-valgrind:
machine:
image: ubuntu-2004:202111-02
resource_class: 2xlarge
steps:
- pre-steps
- install-gflags
- install-valgrind
- run: PORTABLE=1 make V=1 -j32 valgrind_test
- post-steps
build-linux-clang10-clang-analyze:
machine:
image: ubuntu-2004:202111-02
@ -368,7 +387,7 @@ jobs:
- run: CC=clang-10 CXX=clang++-10 ROCKSDB_DISABLE_ALIGNED_NEW=1 CLANG_ANALYZER="/usr/bin/clang++-10" CLANG_SCAN_BUILD=scan-build-10 USE_CLANG=1 make V=1 -j32 analyze # aligned new doesn't work for reason we haven't figured out. For unknown, reason passing "clang++-10" as CLANG_ANALYZER doesn't work, and we need a full path.
- post-steps
build-linux-cmake:
build-linux-cmake-with-folly:
machine:
image: ubuntu-2004:202111-02
resource_class: 2xlarge
@ -376,10 +395,11 @@ jobs:
- pre-steps
- install-gflags
- upgrade-cmake
- run: (mkdir build && cd build && cmake -DWITH_GFLAGS=1 .. && make V=1 -j20 && ctest -j20)
- run: make checkout_folly
- run: (mkdir build && cd build && cmake -DUSE_FOLLY=1 -DWITH_GFLAGS=1 .. && make V=1 -j20 && ctest -j20)
- post-steps
build-linux-cmake-ubuntu-20:
build-linux-cmake-with-benchmark:
machine:
image: ubuntu-2004:202111-02
resource_class: 2xlarge
@ -401,14 +421,15 @@ jobs:
- run: make V=1 -j8 -k check-headers # could be moved to a different build
- post-steps
build-linux-gcc-7:
build-linux-gcc-7-with-folly:
machine:
image: ubuntu-2004:202111-02
resource_class: 2xlarge
steps:
- pre-steps
- run: sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test && sudo apt-get update -y && sudo apt-get install gcc-7 g++-7 libgflags-dev
- run: CC=gcc-7 CXX=g++-7 V=1 make -j32 check
- run: make checkout_folly
- run: USE_FOLLY=1 CC=gcc-7 CXX=g++-7 V=1 make -j32 check
- post-steps
build-linux-gcc-8-no_test_run:
@ -453,6 +474,19 @@ jobs:
- run: CC=clang-13 CXX=clang++-13 USE_CLANG=1 make -j16 all microbench
- post-steps
# Ensure ASAN+UBSAN with folly, and full testsuite with clang 13
build-linux-clang-13-asan-ubsan-with-folly:
machine:
image: ubuntu-2004:202111-02
resource_class: 2xlarge
steps:
- pre-steps
- install-clang-13
- install-gflags
- run: make checkout_folly
- run: CC=clang-13 CXX=clang++-13 USE_CLANG=1 USE_FOLLY=1 COMPILE_WITH_UBSAN=1 COMPILE_WITH_ASAN=1 make -j32 check
- post-steps
# This job is only to make sure the microbench tests are able to run, the benchmark result is not meaningful as the CI host is changing.
build-linux-run-microbench:
machine:
@ -799,11 +833,11 @@ workflows:
- build-linux
build-linux-cmake:
jobs:
- build-linux-cmake
- build-linux-cmake-ubuntu-20
build-linux-encrypted-env:
- build-linux-cmake-with-folly
- build-linux-cmake-with-benchmark
build-linux-encrypted_env-no_compression:
jobs:
- build-linux-encrypted-env
- build-linux-encrypted_env-no_compression
build-linux-shared_lib-alt_namespace-status_checked:
jobs:
- build-linux-shared_lib-alt_namespace-status_checked
@ -871,7 +905,7 @@ workflows:
jobs:
- build-linux-clang-no_test_run
- build-linux-clang-13-no_test_run
- build-linux-gcc-7
- build-linux-gcc-7-with-folly
- build-linux-gcc-8-no_test_run
- build-linux-gcc-10-cxx20-no_test_run
- build-linux-gcc-11-no_test_run
@ -905,3 +939,5 @@ workflows:
- build-linux-arm-test-full
- build-linux-run-microbench
- build-linux-non-shm
- build-linux-clang-13-asan-ubsan-with-folly
- build-linux-valgrind

1
.gitignore vendored
View File

@ -95,3 +95,4 @@ fuzz/proto/gen/
fuzz/crash-*
cmake-build-*
third-party/folly/

View File

@ -78,19 +78,6 @@ if ($ENV{CIRCLECI})
add_definitions(-DCIRCLECI)
endif()
# third-party/folly is only validated to work on Linux and Windows for now.
# So only turn it on there by default.
if(CMAKE_SYSTEM_NAME MATCHES "Linux|Windows")
if(MSVC AND MSVC_VERSION LESS 1910)
# Folly does not compile with MSVC older than VS2017
option(WITH_FOLLY_DISTRIBUTED_MUTEX "build with folly::DistributedMutex" OFF)
else()
option(WITH_FOLLY_DISTRIBUTED_MUTEX "build with folly::DistributedMutex" ON)
endif()
else()
option(WITH_FOLLY_DISTRIBUTED_MUTEX "build with folly::DistributedMutex" OFF)
endif()
if( NOT DEFINED CMAKE_CXX_STANDARD )
set(CMAKE_CXX_STANDARD 17)
endif()
@ -182,26 +169,6 @@ else()
endif()
endif()
string(TIMESTAMP TS "%Y-%m-%d %H:%M:%S" UTC)
set(BUILD_DATE "${TS}" CACHE STRING "the time we first built rocksdb")
find_package(Git)
if(GIT_FOUND AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git")
execute_process(WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" OUTPUT_VARIABLE GIT_SHA COMMAND "${GIT_EXECUTABLE}" rev-parse HEAD )
execute_process(WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" RESULT_VARIABLE GIT_MOD COMMAND "${GIT_EXECUTABLE}" diff-index HEAD --quiet)
execute_process(WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" OUTPUT_VARIABLE GIT_DATE COMMAND "${GIT_EXECUTABLE}" log -1 --date=format:"%Y-%m-%d %T" --format="%ad")
execute_process(WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" OUTPUT_VARIABLE GIT_TAG RESULT_VARIABLE rv COMMAND "${GIT_EXECUTABLE}" symbolic-ref -q --short HEAD OUTPUT_STRIP_TRAILING_WHITESPACE)
if (rv AND NOT rv EQUAL 0)
execute_process(WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" OUTPUT_VARIABLE GIT_TAG COMMAND "${GIT_EXECUTABLE}" describe --tags --exact-match OUTPUT_STRIP_TRAILING_WHITESPACE)
endif()
else()
set(GIT_SHA 0)
set(GIT_MOD 1)
endif()
string(REGEX REPLACE "[^0-9a-fA-F]+" "" GIT_SHA "${GIT_SHA}")
string(REGEX REPLACE "[^0-9: /-]+" "" GIT_DATE "${GIT_DATE}")
option(WITH_MD_LIBRARY "build with MD" ON)
if(WIN32 AND MSVC)
if(WITH_MD_LIBRARY)
@ -211,9 +178,6 @@ if(WIN32 AND MSVC)
endif()
endif()
set(BUILD_VERSION_CC ${CMAKE_BINARY_DIR}/build_version.cc)
configure_file(util/build_version.cc.in ${BUILD_VERSION_CC} @ONLY)
if(MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zi /nologo /EHsc /GS /Gd /GR /GF /fp:precise /Zc:wchar_t /Zc:forScope /errorReport:queue")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /FC /d2Zi+ /W4 /wd4127 /wd4800 /wd4996 /wd4351 /wd4100 /wd4204 /wd4324")
@ -456,23 +420,11 @@ if (ASSERT_STATUS_CHECKED)
add_definitions(-DROCKSDB_ASSERT_STATUS_CHECKED)
endif()
if(DEFINED USE_RTTI)
if(USE_RTTI)
message(STATUS "Enabling RTTI")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DROCKSDB_USE_RTTI")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DROCKSDB_USE_RTTI")
else()
if(MSVC)
message(STATUS "Disabling RTTI in Release builds. Always on in Debug.")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DROCKSDB_USE_RTTI")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /GR-")
else()
message(STATUS "Disabling RTTI in Release builds")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-rtti")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fno-rtti")
endif()
endif()
else()
# RTTI is by default AUTO which enables it in debug and disables it in release.
set(USE_RTTI AUTO CACHE STRING "Enable RTTI in builds")
set_property(CACHE USE_RTTI PROPERTY STRINGS AUTO ON OFF)
if(USE_RTTI STREQUAL "AUTO")
message(STATUS "Enabling RTTI in Debug builds only (default)")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DROCKSDB_USE_RTTI")
if(MSVC)
@ -480,6 +432,20 @@ else()
else()
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fno-rtti")
endif()
elseif(USE_RTTI)
message(STATUS "Enabling RTTI in all builds")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DROCKSDB_USE_RTTI")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DROCKSDB_USE_RTTI")
else()
if(MSVC)
message(STATUS "Disabling RTTI in Release builds. Always on in Debug.")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DROCKSDB_USE_RTTI")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /GR-")
else()
message(STATUS "Disabling RTTI in all builds")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-rtti")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fno-rtti")
endif()
endif()
# Used to run CI build and tests so we can run faster
@ -615,8 +581,9 @@ endif()
include_directories(${PROJECT_SOURCE_DIR})
include_directories(${PROJECT_SOURCE_DIR}/include)
if(WITH_FOLLY_DISTRIBUTED_MUTEX)
if(USE_FOLLY)
include_directories(${PROJECT_SOURCE_DIR}/third-party/folly)
add_definitions(-DUSE_FOLLY -DFOLLY_NO_CONFIG)
endif()
find_package(Threads REQUIRED)
@ -628,8 +595,8 @@ set(SOURCES
cache/cache_key.cc
cache/cache_reservation_manager.cc
cache/clock_cache.cc
cache/compressed_secondary_cache.cc
cache/lru_cache.cc
cache/lru_secondary_cache.cc
cache/sharded_cache.cc
db/arena_wrapped_db_iter.cc
db/blob/blob_fetcher.cc
@ -849,6 +816,7 @@ set(SOURCES
util/thread_local.cc
util/threadpool_imp.cc
util/xxhash.cc
utilities/agg_merge/agg_merge.cc
utilities/backup/backup_engine.cc
utilities/blob_db/blob_compaction_filter.cc
utilities/blob_db/blob_db.cc
@ -998,13 +966,12 @@ else()
env/io_posix.cc)
endif()
if(WITH_FOLLY_DISTRIBUTED_MUTEX)
if(USE_FOLLY)
list(APPEND SOURCES
third-party/folly/folly/detail/Futex.cpp
third-party/folly/folly/synchronization/AtomicNotification.cpp
third-party/folly/folly/synchronization/DistributedMutex.cpp
third-party/folly/folly/synchronization/ParkingLot.cpp
third-party/folly/folly/synchronization/WaitOptions.cpp)
third-party/folly/folly/container/detail/F14Table.cpp
third-party/folly/folly/lang/SafeAssert.cpp
third-party/folly/folly/lang/ToAscii.cpp
third-party/folly/folly/ScopeGuard.cpp)
endif()
set(ROCKSDB_STATIC_LIB rocksdb${ARTIFACT_SUFFIX})
@ -1019,6 +986,60 @@ else()
set(SYSTEM_LIBS ${CMAKE_THREAD_LIBS_INIT})
endif()
set(ROCKSDB_PLUGIN_EXTERNS "")
set(ROCKSDB_PLUGIN_BUILTINS "")
message(STATUS "ROCKSDB PLUGINS TO BUILD ${ROCKSDB_PLUGINS}")
list(APPEND PLUGINS ${ROCKSDB_PLUGINS})
foreach(PLUGIN IN LISTS PLUGINS)
set(PLUGIN_ROOT "${CMAKE_SOURCE_DIR}/plugin/${PLUGIN}/")
message("including rocksb plugin ${PLUGIN_ROOT}")
set(PLUGINMKFILE "${PLUGIN_ROOT}${PLUGIN}.mk")
if (NOT EXISTS ${PLUGINMKFILE})
message(FATAL_ERROR "Missing plugin makefile: ${PLUGINMKFILE}")
endif()
file(READ ${PLUGINMKFILE} PLUGINMK)
string(REGEX MATCH "SOURCES = ([^\n]*)" FOO ${PLUGINMK})
set(MK_SOURCES ${CMAKE_MATCH_1})
separate_arguments(MK_SOURCES)
foreach(MK_FILE IN LISTS MK_SOURCES)
list(APPEND SOURCES "${PLUGIN_ROOT}${MK_FILE}")
endforeach()
string(REGEX MATCH "_FUNC = ([^\n]*)" FOO ${PLUGINMK})
if (NOT ${CMAKE_MATCH_1} STREQUAL "")
string(APPEND ROCKSDB_PLUGIN_BUILTINS "{\"${PLUGIN}\", " ${CMAKE_MATCH_1} "},")
string(APPEND ROCKSDB_PLUGIN_EXTERNS "int " ${CMAKE_MATCH_1} "(ROCKSDB_NAMESPACE::ObjectLibrary&, const std::string&); ")
endif()
string(REGEX MATCH "_LIBS = ([^\n]*)" FOO ${PLUGINMK})
if (NOT ${CMAKE_MATCH_1} STREQUAL "")
list(APPEND THIRDPARTY_LIBS "${CMAKE_MATCH_1}")
endif()
message("THIRDPARTY_LIBS=${THIRDPARTY_LIBS}")
#TODO: We need to set any compile/link-time flags and add any link libraries
endforeach()
string(TIMESTAMP TS "%Y-%m-%d %H:%M:%S" UTC)
set(BUILD_DATE "${TS}" CACHE STRING "the time we first built rocksdb")
find_package(Git)
if(GIT_FOUND AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git")
execute_process(WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" OUTPUT_VARIABLE GIT_SHA COMMAND "${GIT_EXECUTABLE}" rev-parse HEAD )
execute_process(WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" RESULT_VARIABLE GIT_MOD COMMAND "${GIT_EXECUTABLE}" diff-index HEAD --quiet)
execute_process(WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" OUTPUT_VARIABLE GIT_DATE COMMAND "${GIT_EXECUTABLE}" log -1 --date=format:"%Y-%m-%d %T" --format="%ad")
execute_process(WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" OUTPUT_VARIABLE GIT_TAG RESULT_VARIABLE rv COMMAND "${GIT_EXECUTABLE}" symbolic-ref -q --short HEAD OUTPUT_STRIP_TRAILING_WHITESPACE)
if (rv AND NOT rv EQUAL 0)
execute_process(WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" OUTPUT_VARIABLE GIT_TAG COMMAND "${GIT_EXECUTABLE}" describe --tags --exact-match OUTPUT_STRIP_TRAILING_WHITESPACE)
endif()
else()
set(GIT_SHA 0)
set(GIT_MOD 1)
endif()
string(REGEX REPLACE "[^0-9a-fA-F]+" "" GIT_SHA "${GIT_SHA}")
string(REGEX REPLACE "[^0-9: /-]+" "" GIT_DATE "${GIT_DATE}")
set(BUILD_VERSION_CC ${CMAKE_BINARY_DIR}/build_version.cc)
configure_file(util/build_version.cc.in ${BUILD_VERSION_CC} @ONLY)
add_library(${ROCKSDB_STATIC_LIB} STATIC ${SOURCES} ${BUILD_VERSION_CC})
target_link_libraries(${ROCKSDB_STATIC_LIB} PRIVATE
${THIRDPARTY_LIBS} ${SYSTEM_LIBS})
@ -1157,8 +1178,8 @@ if(WITH_TESTS)
list(APPEND TESTS
cache/cache_reservation_manager_test.cc
cache/cache_test.cc
cache/compressed_secondary_cache_test.cc
cache/lru_cache_test.cc
cache/lru_secondary_cache_test.cc
db/blob/blob_counting_iterator_test.cc
db/blob/blob_file_addition_test.cc
db/blob/blob_file_builder_test.cc
@ -1315,6 +1336,7 @@ if(WITH_TESTS)
util/thread_list_test.cc
util/thread_local_test.cc
util/work_queue_test.cc
utilities/agg_merge/agg_merge_test.cc
utilities/backup/backup_engine_test.cc
utilities/blob_db/blob_db_test.cc
utilities/cassandra/cassandra_functional_test.cc
@ -1346,14 +1368,11 @@ if(WITH_TESTS)
)
endif()
if(WITH_FOLLY_DISTRIBUTED_MUTEX)
list(APPEND TESTS third-party/folly/folly/synchronization/test/DistributedMutexTest.cpp)
endif()
set(TESTUTIL_SOURCE
db/db_test_util.cc
monitoring/thread_status_updater_debug.cc
table/mock_table.cc
utilities/agg_merge/test_agg_merge.cc
utilities/cassandra/test_utils.cc
)
enable_testing()

View File

@ -1,5 +1,12 @@
# Rocksdb Change Log
## Unreleased
### New Features
* DB::GetLiveFilesStorageInfo is ready for production use.
### Public API changes
* Add rollback_deletion_type_callback to TransactionDBOptions so that write-prepared transactions know whether to issue a Delete or SingleDelete to cancel a previous key written during prior prepare phase. The PR aims to prevent mixing SingleDeletes and Deletes for the same key that can lead to undefined behaviors for write-prepared transactions.
## 7.2.0 (04/15/2022)
### Bug Fixes
* Fixed bug which caused rocksdb failure in the situation when rocksdb was accessible using UNC path
* Fixed a race condition when 2PC is disabled and WAL tracking in the MANIFEST is enabled. The race condition is between two background flush threads trying to install flush results, causing a WAL deletion not tracked in the MANIFEST. A future DB open may fail.
@ -9,15 +16,30 @@
* Fixed a bug affecting `track_and_verify_wals_in_manifest`. Without the fix, application may see "open error: Corruption: Missing WAL with log number" while trying to open the db. The corruption is a false alarm but prevents DB open (#9766).
* Fix segfault in FilePrefetchBuffer with async_io as it doesn't wait for pending jobs to complete on destruction.
* Fix ERROR_HANDLER_AUTORESUME_RETRY_COUNT stat whose value was set wrong in portal.h
* Fixed a bug for non-TransactionDB with avoid_flush_during_recovery = true and TransactionDB where in case of crash, min_log_number_to_keep may not change on recovery and persisting a new MANIFEST with advanced log_numbers for some column families, results in "column family inconsistency" error on second recovery. As a solution the corrupted WALs whose numbers are larger than the corrupted wal and smaller than the new WAL will be moved to archive folder.
* Fixed a bug in RocksDB DB::Open() which may creates and writes to two new MANIFEST files even before recovery succeeds. Now writes to MANIFEST are persisted only after recovery is successful.
### New Features
* For db_bench when --seed=0 or --seed is not set then it uses the current time as the seed value. Previously it used the value 1000.
* For db_bench when --benchmark lists multiple tests and each test uses a seed for a RNG then the seeds across tests will no longer be repeated.
* Added an option to dynamically charge an updating estimated memory usage of block-based table reader to block cache if block cache available. To enable this feature, set `BlockBasedTableOptions::reserve_table_reader_memory = true`.
* Add new stat ASYNC_READ_BYTES that calculates number of bytes read during async read call and users can check if async code path is being called by RocksDB internal automatic prefetching for sequential reads.
* Enable async prefetching if ReadOptions.readahead_size is set along with ReadOptions.async_io in FilePrefetchBuffer.
* Add event listener support on remote compaction compactor side.
* Added a dedicated integer DB property `rocksdb.live-blob-file-garbage-size` that exposes the total amount of garbage in the blob files in the current version.
* RocksDB does internal auto prefetching if it notices sequential reads. It starts with readahead size `initial_auto_readahead_size` which now can be configured through BlockBasedTableOptions.
* Add a merge operator that allows users to register specific aggregation function so that they can does aggregation using different aggregation types for different keys. See comments in include/rocksdb/utilities/agg_merge.h for actual usage. The feature is experimental and the format is subject to change and we won't provide a migration tool.
* Meta-internal / Experimental: Improve CPU performance by replacing many uses of std::unordered_map with folly::F14FastMap when RocksDB is compiled together with Folly.
* Experimental: Add CompressedSecondaryCache, a concrete implementation of rocksdb::SecondaryCache, that integrates with compression libraries (e.g. LZ4) to hold compressed blocks.
### Behavior changes
* Disallow usage of commit-time-write-batch for write-prepared/write-unprepared transactions if TransactionOptions::use_only_the_last_commit_time_batch_for_recovery is false to prevent two (or more) uncommitted versions of the same key in the database. Otherwise, bottommost compaction may violate the internal key uniqueness invariant of SSTs if the sequence numbers of both internal keys are zeroed out (#9794).
* Make DB::GetUpdatesSince() return NotSupported early for write-prepared/write-unprepared transactions, as the API contract indicates.
### Public API changes
* Exposed APIs to examine results of block cache stats collections in a structured way. In particular, users of `GetMapProperty()` with property `kBlockCacheEntryStats` can now use the functions in `BlockCacheEntryStatsMapKeys` to find stats in the map.
* Add `fail_if_not_bottommost_level` to IngestExternalFileOptions so that ingestion will fail if the file(s) cannot be ingested to the bottommost level.
* Add output parameter `is_in_sec_cache` to `SecondaryCache::Lookup()`. It is to indicate whether the handle is possibly erased from the secondary cache after the Lookup.
## 7.1.0 (03/23/2022)
### New Features

View File

@ -180,8 +180,7 @@ to build a portable binary, add `PORTABLE=1` before your make commands, like thi
* **iOS**:
* Run: `TARGET_OS=IOS make static_lib`. When building the project which uses rocksdb iOS library, make sure to define two important pre-processing macros: `ROCKSDB_LITE` and `IOS_CROSS_COMPILE`.
* **Windows**:
* For building with MS Visual Studio 13 you will need Update 4 installed.
* **Windows** (Visual Studio 2017 to up):
* Read and follow the instructions at CMakeLists.txt
* Or install via [vcpkg](https://github.com/microsoft/vcpkg)
* run `vcpkg install rocksdb:x64-windows`

View File

@ -232,14 +232,20 @@ include make_config.mk
ROCKSDB_PLUGIN_MKS = $(foreach plugin, $(ROCKSDB_PLUGINS), plugin/$(plugin)/*.mk)
include $(ROCKSDB_PLUGIN_MKS)
ROCKSDB_PLUGIN_SOURCES = $(foreach plugin, $(ROCKSDB_PLUGINS), $(foreach source, $($(plugin)_SOURCES), plugin/$(plugin)/$(source)))
ROCKSDB_PLUGIN_HEADERS = $(foreach plugin, $(ROCKSDB_PLUGINS), $(foreach header, $($(plugin)_HEADERS), plugin/$(plugin)/$(header)))
ROCKSDB_PLUGIN_PROTO =ROCKSDB_NAMESPACE::ObjectLibrary\&, const std::string\&
ROCKSDB_PLUGIN_SOURCES = $(foreach p, $(ROCKSDB_PLUGINS), $(foreach source, $($(p)_SOURCES), plugin/$(p)/$(source)))
ROCKSDB_PLUGIN_HEADERS = $(foreach p, $(ROCKSDB_PLUGINS), $(foreach header, $($(p)_HEADERS), plugin/$(p)/$(header)))
ROCKSDB_PLUGIN_LIBS = $(foreach p, $(ROCKSDB_PLUGINS), $(foreach lib, $($(p)_LIBS), -l$(lib)))
ROCKSDB_PLUGIN_W_FUNCS = $(foreach p, $(ROCKSDB_PLUGINS), $(if $($(p)_FUNC), $(p)))
ROCKSDB_PLUGIN_EXTERNS = $(foreach p, $(ROCKSDB_PLUGIN_W_FUNCS), int $($(p)_FUNC)($(ROCKSDB_PLUGIN_PROTO));)
ROCKSDB_PLUGIN_BUILTINS = $(foreach p, $(ROCKSDB_PLUGIN_W_FUNCS), {\"$(p)\"\, $($(p)_FUNC)}\,)
ROCKSDB_PLUGIN_LDFLAGS = $(foreach plugin, $(ROCKSDB_PLUGINS), $($(plugin)_LDFLAGS))
ROCKSDB_PLUGIN_PKGCONFIG_REQUIRES = $(foreach plugin, $(ROCKSDB_PLUGINS), $($(plugin)_PKGCONFIG_REQUIRES))
CXXFLAGS += $(foreach plugin, $(ROCKSDB_PLUGINS), $($(plugin)_CXXFLAGS))
PLATFORM_LDFLAGS += $(ROCKSDB_PLUGIN_LDFLAGS)
# Patch up the link flags for JNI from the plugins
ROCKSDB_PLUGIN_LDFLAGS = $(foreach plugin, $(ROCKSDB_PLUGINS), $($(plugin)_LDFLAGS))
PLATFORM_LDFLAGS += $(ROCKSDB_PLUGIN_LDFLAGS)
JAVA_LDFLAGS += $(ROCKSDB_PLUGIN_LDFLAGS)
JAVA_STATIC_LDFLAGS += $(ROCKSDB_PLUGIN_LDFLAGS)
@ -282,7 +288,7 @@ missing_make_config_paths := $(shell \
grep "\./\S*\|/\S*" -o $(CURDIR)/make_config.mk | \
while read path; \
do [ -e $$path ] || echo $$path; \
done | sort | uniq)
done | sort | uniq | grep -v "/DOES/NOT/EXIST")
$(foreach path, $(missing_make_config_paths), \
$(warning Warning: $(path) does not exist))
@ -334,6 +340,8 @@ endif
# ASAN doesn't work well with jemalloc. If we're compiling with ASAN, we should use regular malloc.
ifdef COMPILE_WITH_ASAN
DISABLE_JEMALLOC=1
ASAN_OPTIONS?=detect_stack_use_after_return=1
export ASAN_OPTIONS
EXEC_LDFLAGS += -fsanitize=address
PLATFORM_CCFLAGS += -fsanitize=address
PLATFORM_CXXFLAGS += -fsanitize=address
@ -394,6 +402,10 @@ ifndef DISABLE_JEMALLOC
ifdef JEMALLOC
PLATFORM_CXXFLAGS += -DROCKSDB_JEMALLOC -DJEMALLOC_NO_DEMANGLE
PLATFORM_CCFLAGS += -DROCKSDB_JEMALLOC -DJEMALLOC_NO_DEMANGLE
ifeq ($(USE_FOLLY),1)
PLATFORM_CXXFLAGS += -DUSE_JEMALLOC
PLATFORM_CCFLAGS += -DUSE_JEMALLOC
endif
endif
ifdef WITH_JEMALLOC_FLAG
PLATFORM_LDFLAGS += -ljemalloc
@ -404,8 +416,8 @@ ifndef DISABLE_JEMALLOC
PLATFORM_CCFLAGS += $(JEMALLOC_INCLUDE)
endif
ifndef USE_FOLLY_DISTRIBUTED_MUTEX
USE_FOLLY_DISTRIBUTED_MUTEX=0
ifndef USE_FOLLY
USE_FOLLY=0
endif
ifndef GTEST_THROW_ON_FAILURE
@ -425,8 +437,12 @@ else
PLATFORM_CXXFLAGS += -isystem $(GTEST_DIR)
endif
ifeq ($(USE_FOLLY_DISTRIBUTED_MUTEX),1)
# This provides a Makefile simulation of a Meta-internal folly integration.
# It is not validated for general use.
ifeq ($(USE_FOLLY),1)
ifeq (,$(FOLLY_DIR))
FOLLY_DIR = ./third-party/folly
endif
# AIX: pre-defined system headers are surrounded by an extern "C" block
ifeq ($(PLATFORM), OS_AIX)
PLATFORM_CCFLAGS += -I$(FOLLY_DIR)
@ -435,6 +451,8 @@ ifeq ($(USE_FOLLY_DISTRIBUTED_MUTEX),1)
PLATFORM_CCFLAGS += -isystem $(FOLLY_DIR)
PLATFORM_CXXFLAGS += -isystem $(FOLLY_DIR)
endif
PLATFORM_CCFLAGS += -DUSE_FOLLY -DFOLLY_NO_CONFIG
PLATFORM_CXXFLAGS += -DUSE_FOLLY -DFOLLY_NO_CONFIG
endif
ifdef TEST_CACHE_LINE_SIZE
@ -521,7 +539,7 @@ LIB_OBJECTS += $(patsubst %.c, $(OBJ_DIR)/%.o, $(LIB_SOURCES_C))
LIB_OBJECTS += $(patsubst %.S, $(OBJ_DIR)/%.o, $(LIB_SOURCES_ASM))
endif
ifeq ($(USE_FOLLY_DISTRIBUTED_MUTEX),1)
ifeq ($(USE_FOLLY),1)
LIB_OBJECTS += $(patsubst %.cpp, $(OBJ_DIR)/%.o, $(FOLLY_SOURCES))
endif
@ -556,11 +574,6 @@ ALL_SOURCES += $(ROCKSDB_PLUGIN_SOURCES)
TESTS = $(patsubst %.cc, %, $(notdir $(TEST_MAIN_SOURCES)))
TESTS += $(patsubst %.c, %, $(notdir $(TEST_MAIN_SOURCES_C)))
ifeq ($(USE_FOLLY_DISTRIBUTED_MUTEX),1)
TESTS += folly_synchronization_distributed_mutex_test
ALL_SOURCES += third-party/folly/folly/synchronization/test/DistributedMutexTest.cc
endif
# `make check-headers` to very that each header file includes its own
# dependencies
ifneq ($(filter check-headers, $(MAKECMDGOALS)),)
@ -585,9 +598,6 @@ am__v_CCH_1 =
check-headers: $(HEADER_OK_FILES)
# options_settable_test doesn't pass with UBSAN as we use hack in the test
ifdef COMPILE_WITH_UBSAN
TESTS := $(shell echo $(TESTS) | sed 's/\boptions_settable_test\b//g')
endif
ifdef ASSERT_STATUS_CHECKED
# TODO: finish fixing all tests to pass this check
TESTS_FAILING_ASC = \
@ -607,10 +617,13 @@ ROCKSDBTESTS_SUBSET ?= $(TESTS)
# env_test - suspicious use of test::TmpDir
# deletefile_test - serial because it generates giant temporary files in
# its various tests. Parallel can fill up your /dev/shm
# db_bloom_filter_test - serial because excessive space usage by instances
# of DBFilterConstructionReserveMemoryTestWithParam can fill up /dev/shm
NON_PARALLEL_TEST = \
c_test \
env_test \
deletefile_test \
db_bloom_filter_test \
PARALLEL_TEST = $(filter-out $(NON_PARALLEL_TEST), $(TESTS))
@ -728,7 +741,7 @@ else
git_mod := $(shell git diff-index HEAD --quiet 2>/dev/null; echo $$?)
git_date := $(shell git log -1 --date=format:"%Y-%m-%d %T" --format="%ad" 2>/dev/null)
endif
gen_build_version = sed -e s/@GIT_SHA@/$(git_sha)/ -e s:@GIT_TAG@:"$(git_tag)": -e s/@GIT_MOD@/"$(git_mod)"/ -e s/@BUILD_DATE@/"$(build_date)"/ -e s/@GIT_DATE@/"$(git_date)"/ util/build_version.cc.in
gen_build_version = sed -e s/@GIT_SHA@/$(git_sha)/ -e s:@GIT_TAG@:"$(git_tag)": -e s/@GIT_MOD@/"$(git_mod)"/ -e s/@BUILD_DATE@/"$(build_date)"/ -e s/@GIT_DATE@/"$(git_date)"/ -e s/@ROCKSDB_PLUGIN_BUILTINS@/'$(ROCKSDB_PLUGIN_BUILTINS)'/ -e s/@ROCKSDB_PLUGIN_EXTERNS@/"$(ROCKSDB_PLUGIN_EXTERNS)"/ util/build_version.cc.in
# Record the version of the source that we are compiling.
# We keep a record of the git revision in this file. It is then built
@ -785,7 +798,7 @@ endif # PLATFORM_SHARED_EXT
.PHONY: blackbox_crash_test check clean coverage crash_test ldb_tests package \
release tags tags0 valgrind_check whitebox_crash_test format static_lib shared_lib all \
dbg rocksdbjavastatic rocksdbjava gen-pc install install-static install-shared uninstall \
analyze tools tools_lib check-headers \
analyze tools tools_lib check-headers checkout_folly \
blackbox_crash_test_with_atomic_flush whitebox_crash_test_with_atomic_flush \
blackbox_crash_test_with_txn whitebox_crash_test_with_txn \
blackbox_crash_test_with_best_efforts_recovery \
@ -1032,31 +1045,31 @@ ldb_tests: ldb
include crash_test.mk
asan_check: clean
ASAN_OPTIONS=detect_stack_use_after_return=1 COMPILE_WITH_ASAN=1 $(MAKE) check -j32
COMPILE_WITH_ASAN=1 $(MAKE) check -j32
$(MAKE) clean
asan_crash_test: clean
ASAN_OPTIONS=detect_stack_use_after_return=1 COMPILE_WITH_ASAN=1 $(MAKE) crash_test
COMPILE_WITH_ASAN=1 $(MAKE) crash_test
$(MAKE) clean
whitebox_asan_crash_test: clean
ASAN_OPTIONS=detect_stack_use_after_return=1 COMPILE_WITH_ASAN=1 $(MAKE) whitebox_crash_test
COMPILE_WITH_ASAN=1 $(MAKE) whitebox_crash_test
$(MAKE) clean
blackbox_asan_crash_test: clean
ASAN_OPTIONS=detect_stack_use_after_return=1 COMPILE_WITH_ASAN=1 $(MAKE) blackbox_crash_test
COMPILE_WITH_ASAN=1 $(MAKE) blackbox_crash_test
$(MAKE) clean
asan_crash_test_with_atomic_flush: clean
ASAN_OPTIONS=detect_stack_use_after_return=1 COMPILE_WITH_ASAN=1 $(MAKE) crash_test_with_atomic_flush
COMPILE_WITH_ASAN=1 $(MAKE) crash_test_with_atomic_flush
$(MAKE) clean
asan_crash_test_with_txn: clean
ASAN_OPTIONS=detect_stack_use_after_return=1 COMPILE_WITH_ASAN=1 $(MAKE) crash_test_with_txn
COMPILE_WITH_ASAN=1 $(MAKE) crash_test_with_txn
$(MAKE) clean
asan_crash_test_with_best_efforts_recovery: clean
ASAN_OPTIONS=detect_stack_use_after_return=1 COMPILE_WITH_ASAN=1 $(MAKE) crash_test_with_best_efforts_recovery
COMPILE_WITH_ASAN=1 $(MAKE) crash_test_with_best_efforts_recovery
$(MAKE) clean
ubsan_check: clean
@ -1300,11 +1313,6 @@ trace_analyzer: $(OBJ_DIR)/tools/trace_analyzer.o $(ANALYZE_OBJECTS) $(TOOLS_LIB
block_cache_trace_analyzer: $(OBJ_DIR)/tools/block_cache_analyzer/block_cache_trace_analyzer_tool.o $(ANALYZE_OBJECTS) $(TOOLS_LIBRARY) $(LIBRARY)
$(AM_LINK)
ifeq ($(USE_FOLLY_DISTRIBUTED_MUTEX),1)
folly_synchronization_distributed_mutex_test: $(OBJ_DIR)/third-party/folly/folly/synchronization/test/DistributedMutexTest.o $(TEST_LIBRARY) $(LIBRARY)
$(AM_LINK)
endif
cache_bench: $(OBJ_DIR)/cache/cache_bench.o $(CACHE_BENCH_OBJECTS) $(LIBRARY)
$(AM_LINK)
@ -1371,6 +1379,9 @@ ribbon_test: $(OBJ_DIR)/util/ribbon_test.o $(TEST_LIBRARY) $(LIBRARY)
option_change_migration_test: $(OBJ_DIR)/utilities/option_change_migration/option_change_migration_test.o $(TEST_LIBRARY) $(LIBRARY)
$(AM_LINK)
agg_merge_test: $(OBJ_DIR)/utilities/agg_merge/agg_merge_test.o $(TEST_LIBRARY) $(LIBRARY)
$(AM_LINK)
stringappend_test: $(OBJ_DIR)/utilities/merge_operators/string_append/stringappend_test.o $(TEST_LIBRARY) $(LIBRARY)
$(AM_LINK)
@ -1842,7 +1853,7 @@ statistics_test: $(OBJ_DIR)/monitoring/statistics_test.o $(TEST_LIBRARY) $(LIBRA
stats_history_test: $(OBJ_DIR)/monitoring/stats_history_test.o $(TEST_LIBRARY) $(LIBRARY)
$(AM_LINK)
lru_secondary_cache_test: $(OBJ_DIR)/cache/lru_secondary_cache_test.o $(TEST_LIBRARY) $(LIBRARY)
compressed_secondary_cache_test: $(OBJ_DIR)/cache/compressed_secondary_cache_test.o $(TEST_LIBRARY) $(LIBRARY)
$(AM_LINK)
lru_cache_test: $(OBJ_DIR)/cache/lru_cache_test.o $(TEST_LIBRARY) $(LIBRARY)
@ -2377,6 +2388,22 @@ commit_prereq:
false # J=$(J) build_tools/precommit_checker.py unit clang_unit release clang_release tsan asan ubsan lite unit_non_shm
# $(MAKE) clean && $(MAKE) jclean && $(MAKE) rocksdbjava;
# For public CI runs, checkout folly in a way that can build with RocksDB.
# This is mostly intended as a test-only simulation of Meta-internal folly
# integration.
checkout_folly:
if [ -e third-party/folly ]; then \
cd third-party/folly && git fetch origin; \
else \
cd third-party && git clone https://github.com/facebook/folly.git; \
fi
@# Pin to a particular version for public CI, so that PR authors don't
@# need to worry about folly breaking our integration. Update periodically
cd third-party/folly && git reset --hard 98b9b2c1124e99f50f9085ddee74ce32afffc665
@# A hack to remove boost dependency.
@# NOTE: this hack is not needed if using FBCODE compiler config
perl -pi -e 's/^(#include <boost)/\/\/$$1/' third-party/folly/folly/functional/Invoke.h
# ---------------------------------------------------------------------------
# Platform-specific compilation
# ---------------------------------------------------------------------------
@ -2429,7 +2456,7 @@ endif
ifneq ($(SKIP_DEPENDS), 1)
DEPFILES = $(patsubst %.cc, $(OBJ_DIR)/%.cc.d, $(ALL_SOURCES))
DEPFILES+ = $(patsubst %.c, $(OBJ_DIR)/%.c.d, $(LIB_SOURCES_C) $(TEST_MAIN_SOURCES_C))
ifeq ($(USE_FOLLY_DISTRIBUTED_MUTEX),1)
ifeq ($(USE_FOLLY),1)
DEPFILES +=$(patsubst %.cpp, $(OBJ_DIR)/%.cpp.d, $(FOLLY_SOURCES))
endif
endif
@ -2477,7 +2504,7 @@ list_all_tests:
# Remove the rules for which dependencies should not be generated and see if any are left.
#If so, include the dependencies; if not, do not include the dependency files
ROCKS_DEP_RULES=$(filter-out clean format check-format check-buck-targets check-headers check-sources jclean jtest package analyze tags rocksdbjavastatic% unity.% unity_test, $(MAKECMDGOALS))
ROCKS_DEP_RULES=$(filter-out clean format check-format check-buck-targets check-headers check-sources jclean jtest package analyze tags rocksdbjavastatic% unity.% unity_test checkout_folly, $(MAKECMDGOALS))
ifneq ("$(ROCKS_DEP_RULES)", "")
-include $(DEPFILES)
endif

85
TARGETS
View File

@ -14,8 +14,8 @@ cpp_library_wrapper(name="rocksdb_lib", srcs=[
"cache/cache_key.cc",
"cache/cache_reservation_manager.cc",
"cache/clock_cache.cc",
"cache/compressed_secondary_cache.cc",
"cache/lru_cache.cc",
"cache/lru_secondary_cache.cc",
"cache/sharded_cache.cc",
"db/arena_wrapped_db_iter.cc",
"db/blob/blob_fetcher.cc",
@ -245,6 +245,7 @@ cpp_library_wrapper(name="rocksdb_lib", srcs=[
"util/thread_local.cc",
"util/threadpool_imp.cc",
"util/xxhash.cc",
"utilities/agg_merge/agg_merge.cc",
"utilities/backup/backup_engine.cc",
"utilities/blob_db/blob_compaction_filter.cc",
"utilities/blob_db/blob_db.cc",
@ -324,7 +325,7 @@ cpp_library_wrapper(name="rocksdb_lib", srcs=[
"utilities/wal_filter.cc",
"utilities/write_batch_with_index/write_batch_with_index.cc",
"utilities/write_batch_with_index/write_batch_with_index_internal.cc",
], deps=[], headers=None, link_whole=False, extra_test_libs=False)
], deps=["//folly/container:f14_hash"], headers=None, link_whole=False, extra_test_libs=False)
cpp_library_wrapper(name="rocksdb_whole_archive_lib", srcs=[
"cache/cache.cc",
@ -332,8 +333,8 @@ cpp_library_wrapper(name="rocksdb_whole_archive_lib", srcs=[
"cache/cache_key.cc",
"cache/cache_reservation_manager.cc",
"cache/clock_cache.cc",
"cache/compressed_secondary_cache.cc",
"cache/lru_cache.cc",
"cache/lru_secondary_cache.cc",
"cache/sharded_cache.cc",
"db/arena_wrapped_db_iter.cc",
"db/blob/blob_fetcher.cc",
@ -563,6 +564,7 @@ cpp_library_wrapper(name="rocksdb_whole_archive_lib", srcs=[
"util/thread_local.cc",
"util/threadpool_imp.cc",
"util/xxhash.cc",
"utilities/agg_merge/agg_merge.cc",
"utilities/backup/backup_engine.cc",
"utilities/blob_db/blob_compaction_filter.cc",
"utilities/blob_db/blob_db.cc",
@ -642,7 +644,7 @@ cpp_library_wrapper(name="rocksdb_whole_archive_lib", srcs=[
"utilities/wal_filter.cc",
"utilities/write_batch_with_index/write_batch_with_index.cc",
"utilities/write_batch_with_index/write_batch_with_index_internal.cc",
], deps=[], headers=None, link_whole=True, extra_test_libs=False)
], deps=["//folly/container:f14_hash"], headers=None, link_whole=True, extra_test_libs=False)
cpp_library_wrapper(name="rocksdb_test_lib", srcs=[
"db/db_test_util.cc",
@ -652,6 +654,7 @@ cpp_library_wrapper(name="rocksdb_test_lib", srcs=[
"test_util/testutil.cc",
"tools/block_cache_analyzer/block_cache_trace_analyzer.cc",
"tools/trace_analyzer_tool.cc",
"utilities/agg_merge/test_agg_merge.cc",
"utilities/cassandra/test_utils.cc",
], deps=[":rocksdb_lib"], headers=None, link_whole=False, extra_test_libs=True)
@ -685,6 +688,8 @@ rocks_cpp_library_wrapper(name="rocksdb_stress_lib", srcs=[
], headers=None)
cpp_binary_wrapper(name="db_stress", srcs=["db_stress_tool/db_stress.cc"], deps=[":rocksdb_stress_lib"], extra_preprocessor_flags=[], extra_bench_libs=False)
cpp_binary_wrapper(name="ribbon_bench", srcs=["microbench/ribbon_bench.cc"], deps=[], extra_preprocessor_flags=[], extra_bench_libs=True)
cpp_binary_wrapper(name="db_basic_bench", srcs=["microbench/db_basic_bench.cc"], deps=[], extra_preprocessor_flags=[], extra_bench_libs=True)
@ -711,9 +716,7 @@ fancy_bench_wrapper(suite_name="rocksdb_microbench_suite_0", binary_to_bench_to_
'DBPut/comp_style:1/max_data:107374182400/per_key_size:256/enable_statistics:1/wal:1/iterations:51200/threads:8': ['real_time',
'put_mean',
'cpu_time',
'put_p99',
'db_size',
'put_p95',
'threads'],
'DBPut/comp_style:2/max_data:107374182400/per_key_size:256/enable_statistics:0/wal:0/iterations:51200/threads:8': ['real_time',
'cpu_time',
@ -769,16 +772,12 @@ fancy_bench_wrapper(suite_name="rocksdb_microbench_suite_1", binary_to_bench_to_
'DBPut/comp_style:2/max_data:107374182400/per_key_size:256/enable_statistics:1/wal:0/iterations:409600/threads:1': ['real_time',
'put_mean',
'cpu_time',
'put_p99',
'db_size',
'put_p95',
'threads'],
'DBPut/comp_style:2/max_data:107374182400/per_key_size:256/enable_statistics:1/wal:1/iterations:51200/threads:8': ['real_time',
'put_mean',
'cpu_time',
'put_p99',
'db_size',
'put_p95',
'threads']},
'ribbon_bench': {'FilterBuild/filter_impl:2/bits_per_key:20/key_len_avg:100/entry_num:1024': ['real_time',
'cpu_time',
@ -1018,9 +1017,7 @@ fancy_bench_wrapper(suite_name="rocksdb_microbench_suite_6", binary_to_bench_to_
'DBPut/comp_style:2/max_data:107374182400/per_key_size:256/enable_statistics:1/wal:0/iterations:51200/threads:8': ['real_time',
'put_mean',
'cpu_time',
'put_p99',
'db_size',
'put_p95',
'threads']},
'ribbon_bench': {'FilterBuild/filter_impl:2/bits_per_key:10/key_len_avg:100/entry_num:1024': ['real_time',
'cpu_time',
@ -1059,16 +1056,12 @@ fancy_bench_wrapper(suite_name="rocksdb_microbench_suite_7", binary_to_bench_to_
'DBPut/comp_style:0/max_data:107374182400/per_key_size:256/enable_statistics:1/wal:0/iterations:51200/threads:8': ['real_time',
'put_mean',
'cpu_time',
'put_p99',
'db_size',
'put_p95',
'threads'],
'DBPut/comp_style:1/max_data:107374182400/per_key_size:256/enable_statistics:1/wal:1/iterations:409600/threads:1': ['real_time',
'put_mean',
'cpu_time',
'put_p99',
'db_size',
'put_p95',
'threads'],
'RandomAccessFileReaderRead/enable_statistics:0/iterations:1000000': ['real_time',
'cpu_time',
@ -1113,9 +1106,7 @@ fancy_bench_wrapper(suite_name="rocksdb_microbench_suite_8", binary_to_bench_to_
'DBPut/comp_style:2/max_data:107374182400/per_key_size:256/enable_statistics:1/wal:1/iterations:409600/threads:1': ['real_time',
'put_mean',
'cpu_time',
'put_p99',
'db_size',
'put_p95',
'threads']},
'ribbon_bench': {'FilterBuild/filter_impl:0/bits_per_key:20/key_len_avg:10/entry_num:1048576': ['real_time',
'cpu_time',
@ -1209,9 +1200,7 @@ fancy_bench_wrapper(suite_name="rocksdb_microbench_suite_10", binary_to_bench_to
'DBPut/comp_style:0/max_data:107374182400/per_key_size:256/enable_statistics:1/wal:1/iterations:409600/threads:1': ['real_time',
'put_mean',
'cpu_time',
'put_p99',
'db_size',
'put_p95',
'threads']},
'ribbon_bench': {'FilterBuild/filter_impl:3/bits_per_key:10/key_len_avg:10/entry_num:1024': ['real_time',
'cpu_time',
@ -1243,16 +1232,12 @@ fancy_bench_wrapper(suite_name="rocksdb_microbench_suite_11", binary_to_bench_to
'DBPut/comp_style:1/max_data:107374182400/per_key_size:256/enable_statistics:1/wal:0/iterations:409600/threads:1': ['real_time',
'put_mean',
'cpu_time',
'put_p99',
'db_size',
'put_p95',
'threads'],
'DBPut/comp_style:1/max_data:107374182400/per_key_size:256/enable_statistics:1/wal:0/iterations:51200/threads:8': ['real_time',
'put_mean',
'cpu_time',
'put_p99',
'db_size',
'put_p95',
'threads']}}, slow=False, expected_runtime=2446, sl_iterations=3, regression_threshold=10)
@ -1337,9 +1322,7 @@ fancy_bench_wrapper(suite_name="rocksdb_microbench_suite_13", binary_to_bench_to
'DBPut/comp_style:0/max_data:107374182400/per_key_size:256/enable_statistics:1/wal:0/iterations:409600/threads:1': ['real_time',
'put_mean',
'cpu_time',
'put_p99',
'db_size',
'put_p95',
'threads']},
'ribbon_bench': {'FilterBuild/filter_impl:2/bits_per_key:20/key_len_avg:100/entry_num:1048576': ['real_time',
'cpu_time',
@ -1380,9 +1363,7 @@ fancy_bench_wrapper(suite_name="rocksdb_microbench_suite_14", binary_to_bench_to
'DBPut/comp_style:0/max_data:107374182400/per_key_size:256/enable_statistics:1/wal:1/iterations:51200/threads:8': ['real_time',
'put_mean',
'cpu_time',
'put_p99',
'db_size',
'put_p95',
'threads']},
'ribbon_bench': {'FilterBuild/filter_impl:0/bits_per_key:10/key_len_avg:10/entry_num:1024': ['real_time',
'cpu_time',
@ -1481,9 +1462,7 @@ fancy_bench_wrapper(suite_name="rocksdb_microbench_suite_0_slow", binary_to_benc
'DBPut/comp_style:1/max_data:107374182400/per_key_size:256/enable_statistics:1/wal:0/iterations:409600/threads:1': ['real_time',
'put_mean',
'cpu_time',
'put_p99',
'db_size',
'put_p95',
'threads'],
'IteratorPrev/comp_style:1/max_data:536870912/per_key_size:256/iterations:10240': ['real_time',
'cpu_time',
@ -1704,9 +1683,7 @@ fancy_bench_wrapper(suite_name="rocksdb_microbench_suite_1_slow", binary_to_benc
'DBPut/comp_style:1/max_data:107374182400/per_key_size:256/enable_statistics:1/wal:0/iterations:409600/threads:1': ['real_time',
'put_mean',
'cpu_time',
'put_p99',
'db_size',
'put_p95',
'threads'],
'IteratorPrev/comp_style:1/max_data:536870912/per_key_size:256/iterations:10240': ['real_time',
'cpu_time',
@ -1927,9 +1904,7 @@ fancy_bench_wrapper(suite_name="rocksdb_microbench_suite_2_slow", binary_to_benc
'DBPut/comp_style:1/max_data:107374182400/per_key_size:256/enable_statistics:1/wal:0/iterations:409600/threads:1': ['real_time',
'put_mean',
'cpu_time',
'put_p99',
'db_size',
'put_p95',
'threads'],
'IteratorPrev/comp_style:1/max_data:536870912/per_key_size:256/iterations:10240': ['real_time',
'cpu_time',
@ -2150,9 +2125,7 @@ fancy_bench_wrapper(suite_name="rocksdb_microbench_suite_3_slow", binary_to_benc
'DBPut/comp_style:1/max_data:107374182400/per_key_size:256/enable_statistics:1/wal:0/iterations:409600/threads:1': ['real_time',
'put_mean',
'cpu_time',
'put_p99',
'db_size',
'put_p95',
'threads'],
'IteratorPrev/comp_style:1/max_data:536870912/per_key_size:256/iterations:10240': ['real_time',
'cpu_time',
@ -2373,9 +2346,7 @@ fancy_bench_wrapper(suite_name="rocksdb_microbench_suite_4_slow", binary_to_benc
'DBPut/comp_style:1/max_data:107374182400/per_key_size:256/enable_statistics:1/wal:0/iterations:409600/threads:1': ['real_time',
'put_mean',
'cpu_time',
'put_p99',
'db_size',
'put_p95',
'threads'],
'IteratorPrev/comp_style:1/max_data:536870912/per_key_size:256/iterations:10240': ['real_time',
'cpu_time',
@ -2596,9 +2567,7 @@ fancy_bench_wrapper(suite_name="rocksdb_microbench_suite_5_slow", binary_to_benc
'DBPut/comp_style:1/max_data:107374182400/per_key_size:256/enable_statistics:1/wal:0/iterations:409600/threads:1': ['real_time',
'put_mean',
'cpu_time',
'put_p99',
'db_size',
'put_p95',
'threads'],
'IteratorPrev/comp_style:1/max_data:536870912/per_key_size:256/iterations:10240': ['real_time',
'cpu_time',
@ -2819,9 +2788,7 @@ fancy_bench_wrapper(suite_name="rocksdb_microbench_suite_6_slow", binary_to_benc
'DBPut/comp_style:1/max_data:107374182400/per_key_size:256/enable_statistics:1/wal:0/iterations:409600/threads:1': ['real_time',
'put_mean',
'cpu_time',
'put_p99',
'db_size',
'put_p95',
'threads'],
'IteratorPrev/comp_style:1/max_data:536870912/per_key_size:256/iterations:10240': ['real_time',
'cpu_time',
@ -3042,9 +3009,7 @@ fancy_bench_wrapper(suite_name="rocksdb_microbench_suite_7_slow", binary_to_benc
'DBPut/comp_style:1/max_data:107374182400/per_key_size:256/enable_statistics:1/wal:0/iterations:409600/threads:1': ['real_time',
'put_mean',
'cpu_time',
'put_p99',
'db_size',
'put_p95',
'threads'],
'IteratorPrev/comp_style:1/max_data:536870912/per_key_size:256/iterations:10240': ['real_time',
'cpu_time',
@ -3265,9 +3230,7 @@ fancy_bench_wrapper(suite_name="rocksdb_microbench_suite_8_slow", binary_to_benc
'DBPut/comp_style:1/max_data:107374182400/per_key_size:256/enable_statistics:1/wal:0/iterations:409600/threads:1': ['real_time',
'put_mean',
'cpu_time',
'put_p99',
'db_size',
'put_p95',
'threads'],
'IteratorPrev/comp_style:1/max_data:536870912/per_key_size:256/iterations:10240': ['real_time',
'cpu_time',
@ -3488,9 +3451,7 @@ fancy_bench_wrapper(suite_name="rocksdb_microbench_suite_9_slow", binary_to_benc
'DBPut/comp_style:1/max_data:107374182400/per_key_size:256/enable_statistics:1/wal:0/iterations:409600/threads:1': ['real_time',
'put_mean',
'cpu_time',
'put_p99',
'db_size',
'put_p95',
'threads'],
'IteratorPrev/comp_style:1/max_data:536870912/per_key_size:256/iterations:10240': ['real_time',
'cpu_time',
@ -3711,9 +3672,7 @@ fancy_bench_wrapper(suite_name="rocksdb_microbench_suite_10_slow", binary_to_ben
'DBPut/comp_style:1/max_data:107374182400/per_key_size:256/enable_statistics:1/wal:0/iterations:409600/threads:1': ['real_time',
'put_mean',
'cpu_time',
'put_p99',
'db_size',
'put_p95',
'threads'],
'IteratorPrev/comp_style:1/max_data:536870912/per_key_size:256/iterations:10240': ['real_time',
'cpu_time',
@ -3934,9 +3893,7 @@ fancy_bench_wrapper(suite_name="rocksdb_microbench_suite_11_slow", binary_to_ben
'DBPut/comp_style:1/max_data:107374182400/per_key_size:256/enable_statistics:1/wal:0/iterations:409600/threads:1': ['real_time',
'put_mean',
'cpu_time',
'put_p99',
'db_size',
'put_p95',
'threads'],
'IteratorPrev/comp_style:1/max_data:536870912/per_key_size:256/iterations:10240': ['real_time',
'cpu_time',
@ -4157,9 +4114,7 @@ fancy_bench_wrapper(suite_name="rocksdb_microbench_suite_12_slow", binary_to_ben
'DBPut/comp_style:1/max_data:107374182400/per_key_size:256/enable_statistics:1/wal:0/iterations:409600/threads:1': ['real_time',
'put_mean',
'cpu_time',
'put_p99',
'db_size',
'put_p95',
'threads'],
'IteratorPrev/comp_style:1/max_data:536870912/per_key_size:256/iterations:10240': ['real_time',
'cpu_time',
@ -4380,9 +4335,7 @@ fancy_bench_wrapper(suite_name="rocksdb_microbench_suite_13_slow", binary_to_ben
'DBPut/comp_style:1/max_data:107374182400/per_key_size:256/enable_statistics:1/wal:0/iterations:409600/threads:1': ['real_time',
'put_mean',
'cpu_time',
'put_p99',
'db_size',
'put_p95',
'threads'],
'IteratorPrev/comp_style:1/max_data:536870912/per_key_size:256/iterations:10240': ['real_time',
'cpu_time',
@ -4603,9 +4556,7 @@ fancy_bench_wrapper(suite_name="rocksdb_microbench_suite_14_slow", binary_to_ben
'DBPut/comp_style:1/max_data:107374182400/per_key_size:256/enable_statistics:1/wal:0/iterations:409600/threads:1': ['real_time',
'put_mean',
'cpu_time',
'put_p99',
'db_size',
'put_p95',
'threads'],
'IteratorPrev/comp_style:1/max_data:536870912/per_key_size:256/iterations:10240': ['real_time',
'cpu_time',
@ -4750,6 +4701,12 @@ fancy_bench_wrapper(suite_name="rocksdb_microbench_suite_14_slow", binary_to_ben
# Do not build the tests in opt mode, since SyncPoint and other test code
# will not be included.
cpp_unittest_wrapper(name="agg_merge_test",
srcs=["utilities/agg_merge/agg_merge_test.cc"],
deps=[":rocksdb_test_lib"],
extra_compiler_flags=[])
cpp_unittest_wrapper(name="arena_test",
srcs=["memory/arena_test.cc"],
deps=[":rocksdb_test_lib"],
@ -4984,6 +4941,12 @@ cpp_unittest_wrapper(name="comparator_db_test",
extra_compiler_flags=[])
cpp_unittest_wrapper(name="compressed_secondary_cache_test",
srcs=["cache/compressed_secondary_cache_test.cc"],
deps=[":rocksdb_test_lib"],
extra_compiler_flags=[])
cpp_unittest_wrapper(name="configurable_test",
srcs=["options/configurable_test.cc"],
deps=[":rocksdb_test_lib"],
@ -5478,12 +5441,6 @@ cpp_unittest_wrapper(name="lru_cache_test",
extra_compiler_flags=[])
cpp_unittest_wrapper(name="lru_secondary_cache_test",
srcs=["cache/lru_secondary_cache_test.cc"],
deps=[":rocksdb_test_lib"],
extra_compiler_flags=[])
cpp_unittest_wrapper(name="manual_compaction_test",
srcs=["db/manual_compaction_test.cc"],
deps=[":rocksdb_test_lib"],

View File

@ -133,9 +133,7 @@
"real_time",
"put_mean",
"cpu_time",
"put_p99",
"db_size",
"put_p95",
"threads",
{
"est_runtime": 360.304737
@ -154,9 +152,7 @@
"real_time",
"put_mean",
"cpu_time",
"put_p99",
"db_size",
"put_p95",
"threads",
{
"est_runtime": 379.059546
@ -492,9 +488,7 @@
"real_time",
"put_mean",
"cpu_time",
"put_p99",
"db_size",
"put_p95",
"threads",
{
"est_runtime": 378.530036
@ -504,9 +498,7 @@
"real_time",
"put_mean",
"cpu_time",
"put_p99",
"db_size",
"put_p95",
"threads",
{
"est_runtime": 2153.984468
@ -525,9 +517,7 @@
"real_time",
"put_mean",
"cpu_time",
"put_p99",
"db_size",
"put_p95",
"threads",
{
"est_runtime": 271.474125
@ -892,9 +882,7 @@
"real_time",
"put_mean",
"cpu_time",
"put_p99",
"db_size",
"put_p95",
"threads",
{
"est_runtime": 378.808772
@ -1260,9 +1248,7 @@
"real_time",
"put_mean",
"cpu_time",
"put_p99",
"db_size",
"put_p95",
"threads",
{
"est_runtime": 360.330841
@ -1272,9 +1258,7 @@
"real_time",
"put_mean",
"cpu_time",
"put_p99",
"db_size",
"put_p95",
"threads",
{
"est_runtime": 2423.955419
@ -1643,9 +1627,7 @@
"real_time",
"put_mean",
"cpu_time",
"put_p99",
"db_size",
"put_p95",
"threads",
{
"est_runtime": 271.438709
@ -1655,9 +1637,7 @@
"real_time",
"put_mean",
"cpu_time",
"put_p99",
"db_size",
"put_p95",
"threads",
{
"est_runtime": 2149.877539
@ -2005,9 +1985,7 @@
"real_time",
"put_mean",
"cpu_time",
"put_p99",
"db_size",
"put_p95",
"threads",
{
"est_runtime": 378.81272
@ -2017,9 +1995,7 @@
"real_time",
"put_mean",
"cpu_time",
"put_p99",
"db_size",
"put_p95",
"threads",
{
"est_runtime": 2157.656253
@ -2029,9 +2005,7 @@
"real_time",
"put_mean",
"cpu_time",
"put_p99",
"db_size",
"put_p95",
"threads",
{
"est_runtime": 271.523747
@ -2339,9 +2313,7 @@
"real_time",
"put_mean",
"cpu_time",
"put_p99",
"db_size",
"put_p95",
"threads",
{
"est_runtime": 2201.425905
@ -2351,9 +2323,7 @@
"real_time",
"put_mean",
"cpu_time",
"put_p99",
"db_size",
"put_p95",
"threads",
{
"est_runtime": 2153.250796
@ -3094,9 +3064,7 @@
"real_time",
"put_mean",
"cpu_time",
"put_p99",
"db_size",
"put_p95",
"threads",
{
"est_runtime": 2473.029062
@ -4065,9 +4033,7 @@
"real_time",
"put_mean",
"cpu_time",
"put_p99",
"db_size",
"put_p95",
"threads",
{
"est_runtime": 2473.313812
@ -4077,9 +4043,7 @@
"real_time",
"put_mean",
"cpu_time",
"put_p99",
"db_size",
"put_p95",
"threads",
{
"est_runtime": 2167.374891
@ -4089,9 +4053,7 @@
"real_time",
"put_mean",
"cpu_time",
"put_p99",
"db_size",
"put_p95",
"threads",
{
"est_runtime": 2350.198115
@ -4110,9 +4072,7 @@
"real_time",
"put_mean",
"cpu_time",
"put_p99",
"db_size",
"put_p95",
"threads",
{
"est_runtime": 378.072129
@ -4942,9 +4902,7 @@
"real_time",
"put_mean",
"cpu_time",
"put_p99",
"db_size",
"put_p95",
"threads",
{
"est_runtime": 2312.17547
@ -4954,9 +4912,7 @@
"real_time",
"put_mean",
"cpu_time",
"put_p99",
"db_size",
"put_p95",
"threads",
{
"est_runtime": 2199.255566
@ -5440,9 +5396,7 @@
"real_time",
"put_mean",
"cpu_time",
"put_p99",
"db_size",
"put_p95",
"threads",
{
"est_runtime": 360.424861
@ -5878,9 +5832,7 @@
"real_time",
"put_mean",
"cpu_time",
"put_p99",
"db_size",
"put_p95",
"threads",
{
"est_runtime": 377.714354

View File

@ -38,9 +38,7 @@
"real_time",
"put_mean",
"cpu_time",
"put_p99",
"db_size",
"put_p95",
"threads",
{
"est_runtime": 271.438709
@ -172,9 +170,7 @@
"real_time",
"put_mean",
"cpu_time",
"put_p99",
"db_size",
"put_p95",
"threads",
{
"est_runtime": 378.072129
@ -184,9 +180,7 @@
"real_time",
"put_mean",
"cpu_time",
"put_p99",
"db_size",
"put_p95",
"threads",
{
"est_runtime": 271.474125
@ -749,9 +743,7 @@
"real_time",
"put_mean",
"cpu_time",
"put_p99",
"db_size",
"put_p95",
"threads",
{
"est_runtime": 378.808772
@ -841,9 +833,7 @@
"real_time",
"put_mean",
"cpu_time",
"put_p99",
"db_size",
"put_p95",
"threads",
{
"est_runtime": 378.81272
@ -853,9 +843,7 @@
"real_time",
"put_mean",
"cpu_time",
"put_p99",
"db_size",
"put_p95",
"threads",
{
"est_runtime": 360.424861
@ -961,9 +949,7 @@
"real_time",
"put_mean",
"cpu_time",
"put_p99",
"db_size",
"put_p95",
"threads",
{
"est_runtime": 271.523747
@ -1179,9 +1165,7 @@
"real_time",
"put_mean",
"cpu_time",
"put_p99",
"db_size",
"put_p95",
"threads",
{
"est_runtime": 360.304737
@ -1254,9 +1238,7 @@
"real_time",
"put_mean",
"cpu_time",
"put_p99",
"db_size",
"put_p95",
"threads",
{
"est_runtime": 377.714354
@ -1266,9 +1248,7 @@
"real_time",
"put_mean",
"cpu_time",
"put_p99",
"db_size",
"put_p95",
"threads",
{
"est_runtime": 379.059546
@ -1463,9 +1443,7 @@
"real_time",
"put_mean",
"cpu_time",
"put_p99",
"db_size",
"put_p95",
"threads",
{
"est_runtime": 378.530036
@ -1562,9 +1540,7 @@
"real_time",
"put_mean",
"cpu_time",
"put_p99",
"db_size",
"put_p95",
"threads",
{
"est_runtime": 360.330841

10
buckifier/buckify_rocksdb.py Normal file → Executable file
View File

@ -1,3 +1,4 @@
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
from __future__ import absolute_import
from __future__ import division
@ -143,7 +144,8 @@ def generate_targets(repo_path, deps_map):
src_mk["LIB_SOURCES"] +
# always add range_tree, it's only excluded on ppc64, which we don't use internally
src_mk["RANGE_TREE_SOURCES"] +
src_mk["TOOL_LIB_SOURCES"])
src_mk["TOOL_LIB_SOURCES"],
deps=["//folly/container:f14_hash"])
# rocksdb_whole_archive_lib
TARGETS.add_library(
"rocksdb_whole_archive_lib",
@ -151,7 +153,7 @@ def generate_targets(repo_path, deps_map):
# always add range_tree, it's only excluded on ppc64, which we don't use internally
src_mk["RANGE_TREE_SOURCES"] +
src_mk["TOOL_LIB_SOURCES"],
deps=None,
deps=["//folly/container:f14_hash"],
headers=None,
extra_external_deps="",
link_whole=True)
@ -183,6 +185,10 @@ def generate_targets(repo_path, deps_map):
src_mk.get("ANALYZER_LIB_SOURCES", [])
+ src_mk.get('STRESS_LIB_SOURCES', [])
+ ["test_util/testutil.cc"])
# db_stress binary
TARGETS.add_binary("db_stress",
["db_stress_tool/db_stress.cc"],
[":rocksdb_stress_lib"])
# bench binaries
for src in src_mk.get("MICROBENCH_SOURCES", []):
name = src.rsplit('/',1)[1].split('.')[0] if '/' in src else src.split('.')[0]

View File

@ -63,7 +63,13 @@ if [ -z "$ROCKSDB_NO_FBCODE" -a -d /mnt/gvfs/third-party ]; then
if [ "$LIB_MODE" == "shared" ]; then
PIC_BUILD=1
fi
if [ -n "$ROCKSDB_FBCODE_BUILD_WITH_PLATFORM010" ]; then
source "$PWD/build_tools/fbcode_config_platform010.sh"
elif [ -n "$ROCKSDB_FBCODE_BUILD_WITH_PLATFORM009" ]; then
source "$PWD/build_tools/fbcode_config_platform009.sh"
else
source "$PWD/build_tools/fbcode_config_platform009.sh"
fi
fi
# Delete existing output, if it exists
@ -874,8 +880,8 @@ if test -n "$WITH_JEMALLOC_FLAG"; then
echo "WITH_JEMALLOC_FLAG=$WITH_JEMALLOC_FLAG" >> "$OUTPUT"
fi
echo "LUA_PATH=$LUA_PATH" >> "$OUTPUT"
if test -n "$USE_FOLLY_DISTRIBUTED_MUTEX"; then
echo "USE_FOLLY_DISTRIBUTED_MUTEX=$USE_FOLLY_DISTRIBUTED_MUTEX" >> "$OUTPUT"
if test -n "$USE_FOLLY"; then
echo "USE_FOLLY=$USE_FOLLY" >> "$OUTPUT"
fi
if test -n "$PPC_LIBC_IS_GNU"; then
echo "PPC_LIBC_IS_GNU=$PPC_LIBC_IS_GNU" >> "$OUTPUT"

View File

@ -19,3 +19,4 @@ BINUTILS_BASE=/mnt/gvfs/third-party2/binutils/08634589372fa5f237bfd374e8c644a836
VALGRIND_BASE=/mnt/gvfs/third-party2/valgrind/6ae525939ad02e5e676855082fbbc7828dbafeac/3.15.0/platform009/7f3b187
LUA_BASE=/mnt/gvfs/third-party2/lua/162efd9561a3d21f6869f4814011e9cf1b3ff4dc/5.3.4/platform009/a6271c4
BENCHMARK_BASE=/mnt/gvfs/third-party2/benchmark/30bf49ad6414325e17f3425b0edcb64239427ae3/1.6.1/platform009/7f3b187
BOOST_BASE=/mnt/gvfs/third-party2/boost/201b7d74941e54b436dfa364a063aa6d2cd7de4c/1.69.0/platform009/8a7ffdf

View File

@ -0,0 +1,22 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
# The file is generated using update_dependencies.sh.
GCC_BASE=/mnt/gvfs/third-party2/gcc/e40bde78650fa91b8405a857e3f10bf336633fb0/11.x/centos7-native/886b5eb
CLANG_BASE=/mnt/gvfs/third-party2/llvm-fb/2043340983c032915adbb6f78903dc855b65aee8/12/platform010/9520e0f
LIBGCC_BASE=/mnt/gvfs/third-party2/libgcc/c00dcc6a3e4125c7e8b248e9a79c14b78ac9e0ca/11.x/platform010/5684a5a
GLIBC_BASE=/mnt/gvfs/third-party2/glibc/0b9c8e4b060eda62f3bc1c6127bbe1256697569b/2.34/platform010/f259413
SNAPPY_BASE=/mnt/gvfs/third-party2/snappy/bc9647f7912b131315827d65cb6189c21f381d05/1.1.3/platform010/76ebdda
ZLIB_BASE=/mnt/gvfs/third-party2/zlib/a6f5f3f1d063d2d00cd02fc12f0f05fc3ab3a994/1.2.11/platform010/76ebdda
BZIP2_BASE=/mnt/gvfs/third-party2/bzip2/09703139cfc376bd8a82642385a0e97726b28287/1.0.6/platform010/76ebdda
LZ4_BASE=/mnt/gvfs/third-party2/lz4/60220d6a5bf7722b9cc239a1368c596619b12060/1.9.1/platform010/76ebdda
ZSTD_BASE=/mnt/gvfs/third-party2/zstd/50eace8143eaaea9473deae1f3283e0049e05633/1.4.x/platform010/64091f4
GFLAGS_BASE=/mnt/gvfs/third-party2/gflags/5d27e5919771603da06000a027b12f799e58a4f7/2.2.0/platform010/76ebdda
JEMALLOC_BASE=/mnt/gvfs/third-party2/jemalloc/b62912d333ef33f9760efa6219dbe3fe6abb3b0e/master/platform010/f57cc4a
NUMA_BASE=/mnt/gvfs/third-party2/numa/6b412770957aa3c8a87e5e0dcd8cc2f45f393bc0/2.0.11/platform010/76ebdda
LIBUNWIND_BASE=/mnt/gvfs/third-party2/libunwind/52f69816e936e147664ad717eb71a1a0e9dc973a/1.4/platform010/5074a48
TBB_BASE=/mnt/gvfs/third-party2/tbb/c9cc192099fa84c0dcd0ffeedd44a373ad6e4925/2018_U5/platform010/76ebdda
LIBURING_BASE=/mnt/gvfs/third-party2/liburing/a98e2d137007e3ebf7f33bd6f99c2c56bdaf8488/20210212/platform010/76ebdda
BENCHMARK_BASE=/mnt/gvfs/third-party2/benchmark/780c7a0f9cf0967961e69ad08e61cddd85d61821/trunk/platform010/76ebdda
KERNEL_HEADERS_BASE=/mnt/gvfs/third-party2/kernel-headers/02d9f76aaaba580611cf75e741753c800c7fdc12/fb/platform010/da39a3e
BINUTILS_BASE=/mnt/gvfs/third-party2/binutils/938dc3f064ef3a48c0446f5b11d788d50b3eb5ee/2.37/centos7-native/da39a3e
VALGRIND_BASE=/mnt/gvfs/third-party2/valgrind/429a6b3203eb415f1599bd15183659153129188e/3.15.0/platform010/76ebdda
LUA_BASE=/mnt/gvfs/third-party2/lua/363787fa5cac2a8aa20638909210443278fa138e/5.3.4/platform010/9079c97

View File

@ -21,38 +21,48 @@ LIBGCC_LIBS=" -L $LIBGCC_BASE/lib"
GLIBC_INCLUDE="$GLIBC_BASE/include"
GLIBC_LIBS=" -L $GLIBC_BASE/lib"
# snappy
SNAPPY_INCLUDE=" -I $SNAPPY_BASE/include/"
if test -z $PIC_BUILD; then
if ! test $ROCKSDB_DISABLE_SNAPPY; then
# snappy
SNAPPY_INCLUDE=" -I $SNAPPY_BASE/include/"
if test -z $PIC_BUILD; then
SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy.a"
else
else
SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy_pic.a"
fi
CFLAGS+=" -DSNAPPY"
fi
CFLAGS+=" -DSNAPPY"
if test -z $PIC_BUILD; then
if ! test $ROCKSDB_DISABLE_ZLIB; then
# location of zlib headers and libraries
ZLIB_INCLUDE=" -I $ZLIB_BASE/include/"
ZLIB_LIBS=" $ZLIB_BASE/lib/libz.a"
CFLAGS+=" -DZLIB"
fi
if ! test $ROCKSDB_DISABLE_BZIP; then
# location of bzip headers and libraries
BZIP_INCLUDE=" -I $BZIP2_BASE/include/"
BZIP_LIBS=" $BZIP2_BASE/lib/libbz2.a"
CFLAGS+=" -DBZIP2"
fi
if ! test $ROCKSDB_DISABLE_LZ4; then
LZ4_INCLUDE=" -I $LZ4_BASE/include/"
LZ4_LIBS=" $LZ4_BASE/lib/liblz4.a"
CFLAGS+=" -DLZ4"
fi
fi
ZSTD_INCLUDE=" -I $ZSTD_BASE/include/"
if test -z $PIC_BUILD; then
if ! test $ROCKSDB_DISABLE_ZSTD; then
ZSTD_INCLUDE=" -I $ZSTD_BASE/include/"
if test -z $PIC_BUILD; then
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd.a"
else
else
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd_pic.a"
fi
CFLAGS+=" -DZSTD -DZSTD_STATIC_LINKING_ONLY"
fi
CFLAGS+=" -DZSTD -DZSTD_STATIC_LINKING_ONLY"
# location of gflags headers and libraries
GFLAGS_INCLUDE=" -I $GFLAGS_BASE/include/"
@ -162,6 +172,4 @@ else
LUA_LIB=" $LUA_PATH/lib/liblua_pic.a"
fi
USE_FOLLY_DISTRIBUTED_MUTEX=1
export CC CXX AR CFLAGS CXXFLAGS EXEC_LDFLAGS EXEC_LDFLAGS_SHARED VALGRIND_VER JEMALLOC_LIB JEMALLOC_INCLUDE CLANG_ANALYZER CLANG_SCAN_BUILD LUA_PATH LUA_LIB

View File

@ -27,28 +27,38 @@ else
MAYBE_PIC=_pic
fi
# snappy
SNAPPY_INCLUDE=" -I $SNAPPY_BASE/include/"
SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy${MAYBE_PIC}.a"
CFLAGS+=" -DSNAPPY"
if ! test $ROCKSDB_DISABLE_SNAPPY; then
# snappy
SNAPPY_INCLUDE=" -I $SNAPPY_BASE/include/"
SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy${MAYBE_PIC}.a"
CFLAGS+=" -DSNAPPY"
fi
# location of zlib headers and libraries
ZLIB_INCLUDE=" -I $ZLIB_BASE/include/"
ZLIB_LIBS=" $ZLIB_BASE/lib/libz${MAYBE_PIC}.a"
CFLAGS+=" -DZLIB"
if ! test $ROCKSDB_DISABLE_ZLIB; then
# location of zlib headers and libraries
ZLIB_INCLUDE=" -I $ZLIB_BASE/include/"
ZLIB_LIBS=" $ZLIB_BASE/lib/libz${MAYBE_PIC}.a"
CFLAGS+=" -DZLIB"
fi
# location of bzip headers and libraries
BZIP_INCLUDE=" -I $BZIP2_BASE/include/"
BZIP_LIBS=" $BZIP2_BASE/lib/libbz2${MAYBE_PIC}.a"
CFLAGS+=" -DBZIP2"
if ! test $ROCKSDB_DISABLE_BZIP; then
# location of bzip headers and libraries
BZIP_INCLUDE=" -I $BZIP2_BASE/include/"
BZIP_LIBS=" $BZIP2_BASE/lib/libbz2${MAYBE_PIC}.a"
CFLAGS+=" -DBZIP2"
fi
LZ4_INCLUDE=" -I $LZ4_BASE/include/"
LZ4_LIBS=" $LZ4_BASE/lib/liblz4${MAYBE_PIC}.a"
CFLAGS+=" -DLZ4"
if ! test $ROCKSDB_DISABLE_LZ4; then
LZ4_INCLUDE=" -I $LZ4_BASE/include/"
LZ4_LIBS=" $LZ4_BASE/lib/liblz4${MAYBE_PIC}.a"
CFLAGS+=" -DLZ4"
fi
ZSTD_INCLUDE=" -I $ZSTD_BASE/include/"
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd${MAYBE_PIC}.a"
CFLAGS+=" -DZSTD"
if ! test $ROCKSDB_DISABLE_ZSTD; then
ZSTD_INCLUDE=" -I $ZSTD_BASE/include/"
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd${MAYBE_PIC}.a"
CFLAGS+=" -DZSTD"
fi
# location of gflags headers and libraries
GFLAGS_INCLUDE=" -I $GFLAGS_BASE/include/"
@ -58,6 +68,8 @@ CFLAGS+=" -DGFLAGS=gflags"
BENCHMARK_INCLUDE=" -I $BENCHMARK_BASE/include/"
BENCHMARK_LIBS=" $BENCHMARK_BASE/lib/libbenchmark${MAYBE_PIC}.a"
BOOST_INCLUDE=" -I $BOOST_BASE/include/"
# location of jemalloc
JEMALLOC_INCLUDE=" -I $JEMALLOC_BASE/include/"
JEMALLOC_LIB=" $JEMALLOC_BASE/lib/libjemalloc${MAYBE_PIC}.a"
@ -89,7 +101,7 @@ BINUTILS="$BINUTILS_BASE/bin"
AR="$BINUTILS/ar"
AS="$BINUTILS/as"
DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP_INCLUDE $LZ4_INCLUDE $ZSTD_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE $TBB_INCLUDE $LIBURING_INCLUDE $BENCHMARK_INCLUDE"
DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP_INCLUDE $LZ4_INCLUDE $ZSTD_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE $TBB_INCLUDE $LIBURING_INCLUDE $BENCHMARK_INCLUDE $BOOST_INCLUDE"
STDLIBS="-L $GCC_BASE/lib64"

View File

@ -0,0 +1,166 @@
#!/bin/sh
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
#
# Set environment variables so that we can compile rocksdb using
# fbcode settings. It uses the latest g++ and clang compilers and also
# uses jemalloc
# Environment variables that change the behavior of this script:
# PIC_BUILD -- if true, it will only take pic versions of libraries from fbcode. libraries that don't have pic variant will not be included
BASEDIR=`dirname $BASH_SOURCE`
source "$BASEDIR/dependencies_platform010.sh"
# Disallow using libraries from default locations as they might not be compatible with platform010 libraries.
CFLAGS=" --sysroot=/DOES/NOT/EXIST"
# libgcc
LIBGCC_INCLUDE="$LIBGCC_BASE/include/c++/trunk"
LIBGCC_LIBS=" -L $LIBGCC_BASE/lib -B$LIBGCC_BASE/lib/gcc/x86_64-facebook-linux/trunk/"
# glibc
GLIBC_INCLUDE="$GLIBC_BASE/include"
GLIBC_LIBS=" -L $GLIBC_BASE/lib"
GLIBC_LIBS+=" -B$GLIBC_BASE/lib"
if test -z $PIC_BUILD; then
MAYBE_PIC=
else
MAYBE_PIC=_pic
fi
if ! test $ROCKSDB_DISABLE_SNAPPY; then
# snappy
SNAPPY_INCLUDE=" -I $SNAPPY_BASE/include/"
SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy${MAYBE_PIC}.a"
CFLAGS+=" -DSNAPPY"
fi
if ! test $ROCKSDB_DISABLE_ZLIB; then
# location of zlib headers and libraries
ZLIB_INCLUDE=" -I $ZLIB_BASE/include/"
ZLIB_LIBS=" $ZLIB_BASE/lib/libz${MAYBE_PIC}.a"
CFLAGS+=" -DZLIB"
fi
if ! test $ROCKSDB_DISABLE_BZIP; then
# location of bzip headers and libraries
BZIP_INCLUDE=" -I $BZIP2_BASE/include/"
BZIP_LIBS=" $BZIP2_BASE/lib/libbz2${MAYBE_PIC}.a"
CFLAGS+=" -DBZIP2"
fi
if ! test $ROCKSDB_DISABLE_LZ4; then
LZ4_INCLUDE=" -I $LZ4_BASE/include/"
LZ4_LIBS=" $LZ4_BASE/lib/liblz4${MAYBE_PIC}.a"
CFLAGS+=" -DLZ4"
fi
if ! test $ROCKSDB_DISABLE_ZSTD; then
ZSTD_INCLUDE=" -I $ZSTD_BASE/include/"
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd${MAYBE_PIC}.a"
CFLAGS+=" -DZSTD"
fi
# location of gflags headers and libraries
GFLAGS_INCLUDE=" -I $GFLAGS_BASE/include/"
GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags${MAYBE_PIC}.a"
CFLAGS+=" -DGFLAGS=gflags"
BENCHMARK_INCLUDE=" -I $BENCHMARK_BASE/include/"
BENCHMARK_LIBS=" $BENCHMARK_BASE/lib/libbenchmark${MAYBE_PIC}.a"
# location of jemalloc
JEMALLOC_INCLUDE=" -I $JEMALLOC_BASE/include/"
JEMALLOC_LIB=" $JEMALLOC_BASE/lib/libjemalloc${MAYBE_PIC}.a"
# location of numa
NUMA_INCLUDE=" -I $NUMA_BASE/include/"
NUMA_LIB=" $NUMA_BASE/lib/libnuma${MAYBE_PIC}.a"
CFLAGS+=" -DNUMA"
# location of libunwind
LIBUNWIND="$LIBUNWIND_BASE/lib/libunwind${MAYBE_PIC}.a"
# location of TBB
TBB_INCLUDE=" -isystem $TBB_BASE/include/"
TBB_LIBS="$TBB_BASE/lib/libtbb${MAYBE_PIC}.a"
CFLAGS+=" -DTBB"
# location of LIBURING
LIBURING_INCLUDE=" -isystem $LIBURING_BASE/include/"
LIBURING_LIBS="$LIBURING_BASE/lib/liburing${MAYBE_PIC}.a"
CFLAGS+=" -DLIBURING"
test "$USE_SSE" || USE_SSE=1
export USE_SSE
test "$PORTABLE" || PORTABLE=1
export PORTABLE
BINUTILS="$BINUTILS_BASE/bin"
AR="$BINUTILS/ar"
AS="$BINUTILS/as"
DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP_INCLUDE $LZ4_INCLUDE $ZSTD_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE $TBB_INCLUDE $LIBURING_INCLUDE $BENCHMARK_INCLUDE"
STDLIBS="-L $GCC_BASE/lib64"
CLANG_BIN="$CLANG_BASE/bin"
CLANG_LIB="$CLANG_BASE/lib"
CLANG_SRC="$CLANG_BASE/../../src"
CLANG_ANALYZER="$CLANG_BIN/clang++"
CLANG_SCAN_BUILD="$CLANG_SRC/llvm/clang/tools/scan-build/bin/scan-build"
if [ -z "$USE_CLANG" ]; then
# gcc
CC="$GCC_BASE/bin/gcc"
CXX="$GCC_BASE/bin/g++"
AR="$GCC_BASE/bin/gcc-ar"
CFLAGS+=" -B$BINUTILS"
CFLAGS+=" -isystem $LIBGCC_INCLUDE"
CFLAGS+=" -isystem $GLIBC_INCLUDE"
JEMALLOC=1
else
# clang
CLANG_INCLUDE="$CLANG_LIB/clang/stable/include"
CC="$CLANG_BIN/clang"
CXX="$CLANG_BIN/clang++"
AR="$CLANG_BIN/llvm-ar"
CFLAGS+=" -B$BINUTILS -nostdinc -nostdlib"
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/trunk "
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/trunk/x86_64-facebook-linux "
CFLAGS+=" -isystem $GLIBC_INCLUDE"
CFLAGS+=" -isystem $LIBGCC_INCLUDE"
CFLAGS+=" -isystem $CLANG_INCLUDE"
CFLAGS+=" -Wno-expansion-to-defined "
CXXFLAGS="-nostdinc++"
fi
KERNEL_HEADERS_INCLUDE="$KERNEL_HEADERS_BASE/include"
CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE/linux "
CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE "
CFLAGS+=" $DEPS_INCLUDE"
CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX -DROCKSDB_FALLOCATE_PRESENT -DROCKSDB_MALLOC_USABLE_SIZE -DROCKSDB_RANGESYNC_PRESENT -DROCKSDB_SCHED_GETCPU_PRESENT -DROCKSDB_SUPPORT_THREAD_LOCAL -DHAVE_SSE42 -DROCKSDB_IOURING_PRESENT"
CXXFLAGS+=" $CFLAGS"
EXEC_LDFLAGS=" $SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $NUMA_LIB $TBB_LIBS $LIBURING_LIBS $BENCHMARK_LIBS"
EXEC_LDFLAGS+=" -Wl,--dynamic-linker,/usr/local/fbcode/platform010/lib/ld.so"
EXEC_LDFLAGS+=" $LIBUNWIND"
EXEC_LDFLAGS+=" -Wl,-rpath=/usr/local/fbcode/platform010/lib"
EXEC_LDFLAGS+=" -Wl,-rpath=$GCC_BASE/lib64"
# required by libtbb
EXEC_LDFLAGS+=" -ldl"
PLATFORM_LDFLAGS="$LIBGCC_LIBS $GLIBC_LIBS $STDLIBS -lgcc -lstdc++"
PLATFORM_LDFLAGS+=" -B$BINUTILS"
EXEC_LDFLAGS_SHARED="$SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $TBB_LIBS $LIBURING_LIBS $BENCHMARK_LIBS"
VALGRIND_VER="$VALGRIND_BASE/bin/"
export CC CXX AR AS CFLAGS CXXFLAGS EXEC_LDFLAGS EXEC_LDFLAGS_SHARED VALGRIND_VER JEMALLOC_LIB JEMALLOC_INCLUDE CLANG_ANALYZER CLANG_SCAN_BUILD LUA_PATH LUA_LIB

View File

@ -9,6 +9,7 @@ OUTPUT=""
function log_header()
{
echo "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved." >> "$OUTPUT"
echo "# The file is generated using update_dependencies.sh." >> "$OUTPUT"
}
@ -18,7 +19,7 @@ function log_variable()
}
TP2_LATEST="/mnt/vol/engshare/fbcode/third-party2"
TP2_LATEST="/data/users/$USER/fbsource/fbcode/third-party2/"
## $1 => lib name
## $2 => lib version (if not provided, will try to pick latest)
## $3 => platform (if not provided, will try to pick latest gcc)
@ -51,6 +52,8 @@ function get_lib_base()
result=`ls -1d $result/*/ | head -n1`
echo Finding link $result
# lib_name => LIB_NAME_BASE
local __res_var=${lib_name^^}"_BASE"
__res_var=`echo $__res_var | tr - _`
@ -61,10 +64,10 @@ function get_lib_base()
}
###########################################################
# platform007 dependencies #
# platform010 dependencies #
###########################################################
OUTPUT="$BASEDIR/dependencies_platform007.sh"
OUTPUT="$BASEDIR/dependencies_platform010.sh"
rm -f "$OUTPUT"
touch "$OUTPUT"
@ -72,40 +75,42 @@ touch "$OUTPUT"
echo "Writing dependencies to $OUTPUT"
# Compilers locations
GCC_BASE=`readlink -f $TP2_LATEST/gcc/7.x/centos7-native/*/`
CLANG_BASE=`readlink -f $TP2_LATEST/llvm-fb/stable/centos7-native/*/`
GCC_BASE=`readlink -f $TP2_LATEST/gcc/11.x/centos7-native/*/`
CLANG_BASE=`readlink -f $TP2_LATEST/llvm-fb/12/platform010/*/`
log_header
log_variable GCC_BASE
log_variable CLANG_BASE
# Libraries locations
get_lib_base libgcc 7.x platform007
get_lib_base glibc 2.26 platform007
get_lib_base snappy LATEST platform007
get_lib_base zlib LATEST platform007
get_lib_base bzip2 LATEST platform007
get_lib_base lz4 LATEST platform007
get_lib_base zstd LATEST platform007
get_lib_base gflags LATEST platform007
get_lib_base jemalloc LATEST platform007
get_lib_base numa LATEST platform007
get_lib_base libunwind LATEST platform007
get_lib_base tbb LATEST platform007
get_lib_base liburing LATEST platform007
get_lib_base libgcc 11.x platform010
get_lib_base glibc 2.34 platform010
get_lib_base snappy LATEST platform010
get_lib_base zlib LATEST platform010
get_lib_base bzip2 LATEST platform010
get_lib_base lz4 LATEST platform010
get_lib_base zstd LATEST platform010
get_lib_base gflags LATEST platform010
get_lib_base jemalloc LATEST platform010
get_lib_base numa LATEST platform010
get_lib_base libunwind LATEST platform010
get_lib_base tbb 2018_U5 platform010
get_lib_base liburing LATEST platform010
get_lib_base benchmark LATEST platform010
get_lib_base kernel-headers fb platform007
get_lib_base kernel-headers fb platform010
get_lib_base binutils LATEST centos7-native
get_lib_base valgrind LATEST platform007
get_lib_base lua 5.3.4 platform007
get_lib_base valgrind LATEST platform010
get_lib_base lua 5.3.4 platform010
git diff $OUTPUT
###########################################################
# 5.x dependencies #
# platform009 dependencies #
###########################################################
OUTPUT="$BASEDIR/dependencies.sh"
OUTPUT="$BASEDIR/dependencies_platform009.sh"
rm -f "$OUTPUT"
touch "$OUTPUT"
@ -113,70 +118,32 @@ touch "$OUTPUT"
echo "Writing dependencies to $OUTPUT"
# Compilers locations
GCC_BASE=`readlink -f $TP2_LATEST/gcc/5.x/centos7-native/*/`
CLANG_BASE=`readlink -f $TP2_LATEST/llvm-fb/stable/centos7-native/*/`
GCC_BASE=`readlink -f $TP2_LATEST/gcc/9.x/centos7-native/*/`
CLANG_BASE=`readlink -f $TP2_LATEST/llvm-fb/9.0.0/platform009/*/`
log_header
log_variable GCC_BASE
log_variable CLANG_BASE
# Libraries locations
get_lib_base libgcc 5.x gcc-5-glibc-2.23
get_lib_base glibc 2.23 gcc-5-glibc-2.23
get_lib_base snappy LATEST gcc-5-glibc-2.23
get_lib_base zlib LATEST gcc-5-glibc-2.23
get_lib_base bzip2 LATEST gcc-5-glibc-2.23
get_lib_base lz4 LATEST gcc-5-glibc-2.23
get_lib_base zstd LATEST gcc-5-glibc-2.23
get_lib_base gflags LATEST gcc-5-glibc-2.23
get_lib_base jemalloc LATEST gcc-5-glibc-2.23
get_lib_base numa LATEST gcc-5-glibc-2.23
get_lib_base libunwind LATEST gcc-5-glibc-2.23
get_lib_base tbb LATEST gcc-5-glibc-2.23
get_lib_base libgcc 9.x platform009
get_lib_base glibc 2.30 platform009
get_lib_base snappy LATEST platform009
get_lib_base zlib LATEST platform009
get_lib_base bzip2 LATEST platform009
get_lib_base lz4 LATEST platform009
get_lib_base zstd LATEST platform009
get_lib_base gflags LATEST platform009
get_lib_base jemalloc LATEST platform009
get_lib_base numa LATEST platform009
get_lib_base libunwind LATEST platform009
get_lib_base tbb 2018_U5 platform009
get_lib_base liburing LATEST platform009
get_lib_base benchmark LATEST platform009
get_lib_base kernel-headers 4.0.9-36_fbk5_2933_gd092e3f gcc-5-glibc-2.23
get_lib_base kernel-headers fb platform009
get_lib_base binutils LATEST centos7-native
get_lib_base valgrind LATEST gcc-5-glibc-2.23
get_lib_base lua 5.2.3 gcc-5-glibc-2.23
git diff $OUTPUT
###########################################################
# 4.8.1 dependencies #
###########################################################
OUTPUT="$BASEDIR/dependencies_4.8.1.sh"
rm -f "$OUTPUT"
touch "$OUTPUT"
echo "Writing 4.8.1 dependencies to $OUTPUT"
# Compilers locations
GCC_BASE=`readlink -f $TP2_LATEST/gcc/4.8.1/centos6-native/*/`
CLANG_BASE=`readlink -f $TP2_LATEST/llvm-fb/stable/centos6-native/*/`
log_header
log_variable GCC_BASE
log_variable CLANG_BASE
# Libraries locations
get_lib_base libgcc 4.8.1 gcc-4.8.1-glibc-2.17
get_lib_base glibc 2.17 gcc-4.8.1-glibc-2.17
get_lib_base snappy LATEST gcc-4.8.1-glibc-2.17
get_lib_base zlib LATEST gcc-4.8.1-glibc-2.17
get_lib_base bzip2 LATEST gcc-4.8.1-glibc-2.17
get_lib_base lz4 LATEST gcc-4.8.1-glibc-2.17
get_lib_base zstd LATEST gcc-4.8.1-glibc-2.17
get_lib_base gflags LATEST gcc-4.8.1-glibc-2.17
get_lib_base jemalloc LATEST gcc-4.8.1-glibc-2.17
get_lib_base numa LATEST gcc-4.8.1-glibc-2.17
get_lib_base libunwind LATEST gcc-4.8.1-glibc-2.17
get_lib_base tbb 4.0_update2 gcc-4.8.1-glibc-2.17
get_lib_base kernel-headers LATEST gcc-4.8.1-glibc-2.17
get_lib_base binutils LATEST centos6-native
get_lib_base valgrind 3.8.1 gcc-4.8.1-glibc-2.17
get_lib_base lua 5.2.3 centos6-native
get_lib_base valgrind LATEST platform009
get_lib_base lua 5.3.4 platform009
git diff $OUTPUT

View File

@ -11,7 +11,7 @@
namespace ROCKSDB_NAMESPACE {
std::array<const char*, kNumCacheEntryRoles> kCacheEntryRoleToCamelString{{
std::array<std::string, kNumCacheEntryRoles> kCacheEntryRoleToCamelString{{
"DataBlock",
"FilterBlock",
"FilterMetaBlock",
@ -25,7 +25,7 @@ std::array<const char*, kNumCacheEntryRoles> kCacheEntryRoleToCamelString{{
"Misc",
}};
std::array<const char*, kNumCacheEntryRoles> kCacheEntryRoleToHyphenString{{
std::array<std::string, kNumCacheEntryRoles> kCacheEntryRoleToHyphenString{{
"data-block",
"filter-block",
"filter-meta-block",
@ -39,16 +39,72 @@ std::array<const char*, kNumCacheEntryRoles> kCacheEntryRoleToHyphenString{{
"misc",
}};
const std::string& GetCacheEntryRoleName(CacheEntryRole role) {
return kCacheEntryRoleToHyphenString[static_cast<size_t>(role)];
}
const std::string& BlockCacheEntryStatsMapKeys::CacheId() {
static const std::string kCacheId = "id";
return kCacheId;
}
const std::string& BlockCacheEntryStatsMapKeys::CacheCapacityBytes() {
static const std::string kCacheCapacityBytes = "capacity";
return kCacheCapacityBytes;
}
const std::string&
BlockCacheEntryStatsMapKeys::LastCollectionDurationSeconds() {
static const std::string kLastCollectionDurationSeconds =
"secs_for_last_collection";
return kLastCollectionDurationSeconds;
}
const std::string& BlockCacheEntryStatsMapKeys::LastCollectionAgeSeconds() {
static const std::string kLastCollectionAgeSeconds =
"secs_since_last_collection";
return kLastCollectionAgeSeconds;
}
namespace {
std::string GetPrefixedCacheEntryRoleName(const std::string& prefix,
CacheEntryRole role) {
const std::string& role_name = GetCacheEntryRoleName(role);
std::string prefixed_role_name;
prefixed_role_name.reserve(prefix.size() + role_name.size());
prefixed_role_name.append(prefix);
prefixed_role_name.append(role_name);
return prefixed_role_name;
}
} // namespace
std::string BlockCacheEntryStatsMapKeys::EntryCount(CacheEntryRole role) {
const static std::string kPrefix = "count.";
return GetPrefixedCacheEntryRoleName(kPrefix, role);
}
std::string BlockCacheEntryStatsMapKeys::UsedBytes(CacheEntryRole role) {
const static std::string kPrefix = "bytes.";
return GetPrefixedCacheEntryRoleName(kPrefix, role);
}
std::string BlockCacheEntryStatsMapKeys::UsedPercent(CacheEntryRole role) {
const static std::string kPrefix = "percent.";
return GetPrefixedCacheEntryRoleName(kPrefix, role);
}
namespace {
struct Registry {
std::mutex mutex;
std::unordered_map<Cache::DeleterFn, CacheEntryRole> role_map;
UnorderedMap<Cache::DeleterFn, CacheEntryRole> role_map;
void Register(Cache::DeleterFn fn, CacheEntryRole role) {
std::lock_guard<std::mutex> lock(mutex);
role_map[fn] = role;
}
std::unordered_map<Cache::DeleterFn, CacheEntryRole> Copy() {
UnorderedMap<Cache::DeleterFn, CacheEntryRole> Copy() {
std::lock_guard<std::mutex> lock(mutex);
return role_map;
}
@ -65,7 +121,7 @@ void RegisterCacheDeleterRole(Cache::DeleterFn fn, CacheEntryRole role) {
GetRegistry().Register(fn, role);
}
std::unordered_map<Cache::DeleterFn, CacheEntryRole> CopyCacheDeleterRoleMap() {
UnorderedMap<Cache::DeleterFn, CacheEntryRole> CopyCacheDeleterRoleMap() {
return GetRegistry().Copy();
}

View File

@ -9,49 +9,15 @@
#include <cstdint>
#include <memory>
#include <type_traits>
#include <unordered_map>
#include "rocksdb/cache.h"
#include "util/hash_containers.h"
namespace ROCKSDB_NAMESPACE {
// Classifications of block cache entries, for reporting statistics
// Adding new enum to this class requires corresponding updates to
// kCacheEntryRoleToCamelString and kCacheEntryRoleToHyphenString
enum class CacheEntryRole {
// Block-based table data block
kDataBlock,
// Block-based table filter block (full or partitioned)
kFilterBlock,
// Block-based table metadata block for partitioned filter
kFilterMetaBlock,
// Block-based table deprecated filter block (old "block-based" filter)
kDeprecatedFilterBlock,
// Block-based table index block
kIndexBlock,
// Other kinds of block-based table block
kOtherBlock,
// WriteBufferManager reservations to account for memtable usage
kWriteBuffer,
// BlockBasedTableBuilder reservations to account for
// compression dictionary building buffer's memory usage
kCompressionDictionaryBuildingBuffer,
// Filter reservations to account for
// (new) bloom and ribbon filter construction's memory usage
kFilterConstruction,
// BlockBasedTableReader reservations to account for
// its memory usage
kBlockBasedTableReader,
// Default bucket, for miscellaneous cache entries. Do not use for
// entries that could potentially add up to large usage.
kMisc,
};
constexpr uint32_t kNumCacheEntryRoles =
static_cast<uint32_t>(CacheEntryRole::kMisc) + 1;
extern std::array<const char*, kNumCacheEntryRoles>
extern std::array<std::string, kNumCacheEntryRoles>
kCacheEntryRoleToCamelString;
extern std::array<const char*, kNumCacheEntryRoles>
extern std::array<std::string, kNumCacheEntryRoles>
kCacheEntryRoleToHyphenString;
// To associate cache entries with their role, we use a hack on the
@ -78,7 +44,7 @@ void RegisterCacheDeleterRole(Cache::DeleterFn fn, CacheEntryRole role);
// * This is suitable for preparing for batch operations, like with
// CacheEntryStatsCollector.
// * The number of mappings should be sufficiently small (dozens).
std::unordered_map<Cache::DeleterFn, CacheEntryRole> CopyCacheDeleterRoleMap();
UnorderedMap<Cache::DeleterFn, CacheEntryRole> CopyCacheDeleterRoleMap();
// ************************************************************** //
// An automatic registration infrastructure. This enables code

View File

@ -47,13 +47,14 @@ TEST_F(CacheReservationManagerTest, GenerateCacheKey) {
// Next unique Cache key
CacheKey ckey = CacheKey::CreateUniqueForCacheLifetime(cache.get());
// Back it up to the one used by CRM (using CacheKey implementation details)
using PairU64 = std::pair<uint64_t, uint64_t>;
// Get to the underlying values
using PairU64 = std::array<uint64_t, 2>;
auto& ckey_pair = *reinterpret_cast<PairU64*>(&ckey);
ckey_pair.second--;
// Back it up to the one used by CRM (using CacheKey implementation details)
ckey_pair[1]--;
// Specific key (subject to implementation details)
EXPECT_EQ(ckey_pair, PairU64(0, 2));
EXPECT_EQ(ckey_pair, PairU64({0, 2}));
Cache::Handle* handle = cache->Lookup(ckey.AsSlice());
EXPECT_NE(handle, nullptr)

View File

@ -3,7 +3,7 @@
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "cache/lru_secondary_cache.h"
#include "cache/compressed_secondary_cache.h"
#include <memory>
@ -22,7 +22,7 @@ void DeletionCallback(const Slice& /*key*/, void* obj) {
} // namespace
LRUSecondaryCache::LRUSecondaryCache(
CompressedSecondaryCache::CompressedSecondaryCache(
size_t capacity, int num_shard_bits, bool strict_capacity_limit,
double high_pri_pool_ratio,
std::shared_ptr<MemoryAllocator> memory_allocator, bool use_adaptive_mutex,
@ -37,11 +37,13 @@ LRUSecondaryCache::LRUSecondaryCache(
use_adaptive_mutex, metadata_charge_policy);
}
LRUSecondaryCache::~LRUSecondaryCache() { cache_.reset(); }
CompressedSecondaryCache::~CompressedSecondaryCache() { cache_.reset(); }
std::unique_ptr<SecondaryCacheResultHandle> LRUSecondaryCache::Lookup(
const Slice& key, const Cache::CreateCallback& create_cb, bool /*wait*/) {
std::unique_ptr<SecondaryCacheResultHandle> CompressedSecondaryCache::Lookup(
const Slice& key, const Cache::CreateCallback& create_cb, bool /*wait*/,
bool& is_in_sec_cache) {
std::unique_ptr<SecondaryCacheResultHandle> handle;
is_in_sec_cache = false;
Cache::Handle* lru_handle = cache_->Lookup(key);
if (lru_handle == nullptr) {
return handle;
@ -69,23 +71,24 @@ std::unique_ptr<SecondaryCacheResultHandle> LRUSecondaryCache::Lookup(
cache_options_.memory_allocator.get());
if (!uncompressed) {
cache_->Release(lru_handle, true);
cache_->Release(lru_handle, /* erase_if_last_ref */ true);
return handle;
}
s = create_cb(uncompressed.get(), uncompressed_size, &value, &charge);
}
if (!s.ok()) {
cache_->Release(lru_handle, true);
cache_->Release(lru_handle, /* erase_if_last_ref */ true);
return handle;
}
handle.reset(new LRUSecondaryCacheResultHandle(value, charge));
cache_->Release(lru_handle);
cache_->Release(lru_handle, /* erase_if_last_ref */ true);
handle.reset(new CompressedSecondaryCacheResultHandle(value, charge));
return handle;
}
Status LRUSecondaryCache::Insert(const Slice& key, void* value,
Status CompressedSecondaryCache::Insert(const Slice& key, void* value,
const Cache::CacheItemHelper* helper) {
size_t size = (*helper->size_cb)(value);
CacheAllocationPtr ptr =
@ -125,9 +128,9 @@ Status LRUSecondaryCache::Insert(const Slice& key, void* value,
return cache_->Insert(key, buf, size, DeletionCallback);
}
void LRUSecondaryCache::Erase(const Slice& key) { cache_->Erase(key); }
void CompressedSecondaryCache::Erase(const Slice& key) { cache_->Erase(key); }
std::string LRUSecondaryCache::GetPrintableOptions() const {
std::string CompressedSecondaryCache::GetPrintableOptions() const {
std::string ret;
ret.reserve(20000);
const int kBufferSize = 200;
@ -142,23 +145,23 @@ std::string LRUSecondaryCache::GetPrintableOptions() const {
return ret;
}
std::shared_ptr<SecondaryCache> NewLRUSecondaryCache(
std::shared_ptr<SecondaryCache> NewCompressedSecondaryCache(
size_t capacity, int num_shard_bits, bool strict_capacity_limit,
double high_pri_pool_ratio,
std::shared_ptr<MemoryAllocator> memory_allocator, bool use_adaptive_mutex,
CacheMetadataChargePolicy metadata_charge_policy,
CompressionType compression_type, uint32_t compress_format_version) {
return std::make_shared<LRUSecondaryCache>(
return std::make_shared<CompressedSecondaryCache>(
capacity, num_shard_bits, strict_capacity_limit, high_pri_pool_ratio,
memory_allocator, use_adaptive_mutex, metadata_charge_policy,
compression_type, compress_format_version);
}
std::shared_ptr<SecondaryCache> NewLRUSecondaryCache(
const LRUSecondaryCacheOptions& opts) {
std::shared_ptr<SecondaryCache> NewCompressedSecondaryCache(
const CompressedSecondaryCacheOptions& opts) {
// The secondary_cache is disabled for this LRUCache instance.
assert(opts.secondary_cache == nullptr);
return NewLRUSecondaryCache(
return NewCompressedSecondaryCache(
opts.capacity, opts.num_shard_bits, opts.strict_capacity_limit,
opts.high_pri_pool_ratio, opts.memory_allocator, opts.use_adaptive_mutex,
opts.metadata_charge_policy, opts.compression_type,

View File

@ -16,15 +16,16 @@
namespace ROCKSDB_NAMESPACE {
class LRUSecondaryCacheResultHandle : public SecondaryCacheResultHandle {
class CompressedSecondaryCacheResultHandle : public SecondaryCacheResultHandle {
public:
LRUSecondaryCacheResultHandle(void* value, size_t size)
CompressedSecondaryCacheResultHandle(void* value, size_t size)
: value_(value), size_(size) {}
virtual ~LRUSecondaryCacheResultHandle() override = default;
virtual ~CompressedSecondaryCacheResultHandle() override = default;
LRUSecondaryCacheResultHandle(const LRUSecondaryCacheResultHandle&) = delete;
LRUSecondaryCacheResultHandle& operator=(
const LRUSecondaryCacheResultHandle&) = delete;
CompressedSecondaryCacheResultHandle(
const CompressedSecondaryCacheResultHandle&) = delete;
CompressedSecondaryCacheResultHandle& operator=(
const CompressedSecondaryCacheResultHandle&) = delete;
bool IsReady() override { return true; }
@ -39,19 +40,19 @@ class LRUSecondaryCacheResultHandle : public SecondaryCacheResultHandle {
size_t size_;
};
// The LRUSecondaryCache is a concrete implementation of
// The CompressedSecondaryCache is a concrete implementation of
// rocksdb::SecondaryCache.
//
// Users can also cast a pointer to it and call methods on
// it directly, especially custom methods that may be added
// in the future. For example -
// std::unique_ptr<rocksdb::SecondaryCache> cache =
// NewLRUSecondaryCache(opts);
// static_cast<LRUSecondaryCache*>(cache.get())->Erase(key);
// NewCompressedSecondaryCache(opts);
// static_cast<CompressedSecondaryCache*>(cache.get())->Erase(key);
class LRUSecondaryCache : public SecondaryCache {
class CompressedSecondaryCache : public SecondaryCache {
public:
LRUSecondaryCache(
CompressedSecondaryCache(
size_t capacity, int num_shard_bits, bool strict_capacity_limit,
double high_pri_pool_ratio,
std::shared_ptr<MemoryAllocator> memory_allocator = nullptr,
@ -60,16 +61,16 @@ class LRUSecondaryCache : public SecondaryCache {
kDontChargeCacheMetadata,
CompressionType compression_type = CompressionType::kLZ4Compression,
uint32_t compress_format_version = 2);
virtual ~LRUSecondaryCache() override;
virtual ~CompressedSecondaryCache() override;
const char* Name() const override { return "LRUSecondaryCache"; }
const char* Name() const override { return "CompressedSecondaryCache"; }
Status Insert(const Slice& key, void* value,
const Cache::CacheItemHelper* helper) override;
std::unique_ptr<SecondaryCacheResultHandle> Lookup(
const Slice& key, const Cache::CreateCallback& create_cb,
bool /*wait*/) override;
const Slice& key, const Cache::CreateCallback& create_cb, bool /*wait*/,
bool& is_in_sec_cache) override;
void Erase(const Slice& key) override;
@ -79,7 +80,7 @@ class LRUSecondaryCache : public SecondaryCache {
private:
std::shared_ptr<Cache> cache_;
LRUSecondaryCacheOptions cache_options_;
CompressedSecondaryCacheOptions cache_options_;
};
} // namespace ROCKSDB_NAMESPACE

View File

@ -3,7 +3,7 @@
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "cache/lru_secondary_cache.h"
#include "cache/compressed_secondary_cache.h"
#include <algorithm>
#include <cstdint>
@ -17,10 +17,10 @@
namespace ROCKSDB_NAMESPACE {
class LRUSecondaryCacheTest : public testing::Test {
class CompressedSecondaryCacheTest : public testing::Test {
public:
LRUSecondaryCacheTest() : fail_create_(false) {}
~LRUSecondaryCacheTest() {}
CompressedSecondaryCacheTest() : fail_create_(false) {}
~CompressedSecondaryCacheTest() {}
protected:
class TestItem {
@ -80,7 +80,7 @@ class LRUSecondaryCacheTest : public testing::Test {
void SetFailCreate(bool fail) { fail_create_ = fail; }
void BasicTest(bool sec_cache_is_compressed, bool use_jemalloc) {
LRUSecondaryCacheOptions opts;
CompressedSecondaryCacheOptions opts;
opts.capacity = 2048;
opts.num_shard_bits = 0;
opts.metadata_charge_policy = kDontChargeCacheMetadata;
@ -107,11 +107,13 @@ class LRUSecondaryCacheTest : public testing::Test {
ROCKSDB_GTEST_BYPASS("JEMALLOC not supported");
}
}
std::shared_ptr<SecondaryCache> cache = NewLRUSecondaryCache(opts);
std::shared_ptr<SecondaryCache> sec_cache =
NewCompressedSecondaryCache(opts);
bool is_in_sec_cache{true};
// Lookup an non-existent key.
std::unique_ptr<SecondaryCacheResultHandle> handle0 =
cache->Lookup("k0", test_item_creator, true);
sec_cache->Lookup("k0", test_item_creator, true, is_in_sec_cache);
ASSERT_EQ(handle0, nullptr);
Random rnd(301);
@ -119,51 +121,47 @@ class LRUSecondaryCacheTest : public testing::Test {
std::string str1;
test::CompressibleString(&rnd, 0.25, 1000, &str1);
TestItem item1(str1.data(), str1.length());
ASSERT_OK(cache->Insert("k1", &item1, &LRUSecondaryCacheTest::helper_));
ASSERT_OK(sec_cache->Insert("k1", &item1,
&CompressedSecondaryCacheTest::helper_));
std::unique_ptr<SecondaryCacheResultHandle> handle1 =
cache->Lookup("k1", test_item_creator, true);
sec_cache->Lookup("k1", test_item_creator, true, is_in_sec_cache);
ASSERT_NE(handle1, nullptr);
// delete reinterpret_cast<TestItem*>(handle1->Value());
ASSERT_FALSE(is_in_sec_cache);
std::unique_ptr<TestItem> val1 =
std::unique_ptr<TestItem>(static_cast<TestItem*>(handle1->Value()));
ASSERT_NE(val1, nullptr);
ASSERT_EQ(memcmp(val1->Buf(), item1.Buf(), item1.Size()), 0);
// Lookup the first item again.
std::unique_ptr<SecondaryCacheResultHandle> handle1_1 =
sec_cache->Lookup("k1", test_item_creator, true, is_in_sec_cache);
ASSERT_EQ(handle1_1, nullptr);
// Insert and Lookup the second item.
std::string str2;
test::CompressibleString(&rnd, 0.5, 1000, &str2);
TestItem item2(str2.data(), str2.length());
ASSERT_OK(cache->Insert("k2", &item2, &LRUSecondaryCacheTest::helper_));
ASSERT_OK(sec_cache->Insert("k2", &item2,
&CompressedSecondaryCacheTest::helper_));
std::unique_ptr<SecondaryCacheResultHandle> handle2 =
cache->Lookup("k2", test_item_creator, true);
sec_cache->Lookup("k2", test_item_creator, true, is_in_sec_cache);
ASSERT_NE(handle2, nullptr);
std::unique_ptr<TestItem> val2 =
std::unique_ptr<TestItem>(static_cast<TestItem*>(handle2->Value()));
ASSERT_NE(val2, nullptr);
ASSERT_EQ(memcmp(val2->Buf(), item2.Buf(), item2.Size()), 0);
// Lookup the first item again to make sure it is still in the cache.
std::unique_ptr<SecondaryCacheResultHandle> handle1_1 =
cache->Lookup("k1", test_item_creator, true);
ASSERT_NE(handle1_1, nullptr);
std::unique_ptr<TestItem> val1_1 =
std::unique_ptr<TestItem>(static_cast<TestItem*>(handle1_1->Value()));
ASSERT_NE(val1_1, nullptr);
ASSERT_EQ(memcmp(val1_1->Buf(), item1.Buf(), item1.Size()), 0);
std::vector<SecondaryCacheResultHandle*> handles = {handle1.get(),
handle2.get()};
cache->WaitAll(handles);
sec_cache->WaitAll(handles);
cache->Erase("k1");
handle1 = cache->Lookup("k1", test_item_creator, true);
ASSERT_EQ(handle1, nullptr);
cache.reset();
sec_cache.reset();
}
void FailsTest(bool sec_cache_is_compressed) {
LRUSecondaryCacheOptions secondary_cache_opts;
CompressedSecondaryCacheOptions secondary_cache_opts;
if (sec_cache_is_compressed) {
if (!LZ4_Supported()) {
ROCKSDB_GTEST_SKIP("This test requires LZ4 support.");
@ -176,32 +174,28 @@ class LRUSecondaryCacheTest : public testing::Test {
secondary_cache_opts.capacity = 1100;
secondary_cache_opts.num_shard_bits = 0;
secondary_cache_opts.metadata_charge_policy = kDontChargeCacheMetadata;
std::shared_ptr<SecondaryCache> cache =
NewLRUSecondaryCache(secondary_cache_opts);
std::shared_ptr<SecondaryCache> sec_cache =
NewCompressedSecondaryCache(secondary_cache_opts);
// Insert and Lookup the first item.
Random rnd(301);
std::string str1(rnd.RandomString(1000));
TestItem item1(str1.data(), str1.length());
ASSERT_OK(cache->Insert("k1", &item1, &LRUSecondaryCacheTest::helper_));
std::unique_ptr<SecondaryCacheResultHandle> handle1 =
cache->Lookup("k1", test_item_creator, true);
ASSERT_NE(handle1, nullptr);
std::unique_ptr<TestItem> val1 =
std::unique_ptr<TestItem>(static_cast<TestItem*>(handle1->Value()));
ASSERT_NE(val1, nullptr);
ASSERT_EQ(memcmp(val1->Buf(), item1.Buf(), item1.Size()), 0);
ASSERT_OK(sec_cache->Insert("k1", &item1,
&CompressedSecondaryCacheTest::helper_));
// Insert and Lookup the second item.
std::string str2(rnd.RandomString(200));
TestItem item2(str2.data(), str2.length());
// k1 is evicted.
ASSERT_OK(cache->Insert("k2", &item2, &LRUSecondaryCacheTest::helper_));
ASSERT_OK(sec_cache->Insert("k2", &item2,
&CompressedSecondaryCacheTest::helper_));
bool is_in_sec_cache{false};
std::unique_ptr<SecondaryCacheResultHandle> handle1_1 =
cache->Lookup("k1", test_item_creator, true);
sec_cache->Lookup("k1", test_item_creator, true, is_in_sec_cache);
ASSERT_EQ(handle1_1, nullptr);
std::unique_ptr<SecondaryCacheResultHandle> handle2 =
cache->Lookup("k2", test_item_creator, true);
sec_cache->Lookup("k2", test_item_creator, true, is_in_sec_cache);
ASSERT_NE(handle2, nullptr);
std::unique_ptr<TestItem> val2 =
std::unique_ptr<TestItem>(static_cast<TestItem*>(handle2->Value()));
@ -211,20 +205,20 @@ class LRUSecondaryCacheTest : public testing::Test {
// Create Fails.
SetFailCreate(true);
std::unique_ptr<SecondaryCacheResultHandle> handle2_1 =
cache->Lookup("k2", test_item_creator, true);
sec_cache->Lookup("k2", test_item_creator, true, is_in_sec_cache);
ASSERT_EQ(handle2_1, nullptr);
// Save Fails.
std::string str3 = rnd.RandomString(10);
TestItem item3(str3.data(), str3.length());
ASSERT_NOK(
cache->Insert("k3", &item3, &LRUSecondaryCacheTest::helper_fail_));
ASSERT_NOK(sec_cache->Insert("k3", &item3,
&CompressedSecondaryCacheTest::helper_fail_));
cache.reset();
sec_cache.reset();
}
void BasicIntegrationTest(bool sec_cache_is_compressed) {
LRUSecondaryCacheOptions secondary_cache_opts;
CompressedSecondaryCacheOptions secondary_cache_opts;
if (sec_cache_is_compressed) {
if (!LZ4_Supported()) {
@ -239,7 +233,7 @@ class LRUSecondaryCacheTest : public testing::Test {
secondary_cache_opts.num_shard_bits = 0;
secondary_cache_opts.metadata_charge_policy = kDontChargeCacheMetadata;
std::shared_ptr<SecondaryCache> secondary_cache =
NewLRUSecondaryCache(secondary_cache_opts);
NewCompressedSecondaryCache(secondary_cache_opts);
LRUCacheOptions lru_cache_opts(1024, 0, false, 0.5, nullptr,
kDefaultToAdaptiveMutex,
kDontChargeCacheMetadata);
@ -252,26 +246,26 @@ class LRUSecondaryCacheTest : public testing::Test {
std::string str1 = rnd.RandomString(1010);
std::string str1_clone{str1};
TestItem* item1 = new TestItem(str1.data(), str1.length());
ASSERT_OK(cache->Insert("k1", item1, &LRUSecondaryCacheTest::helper_,
ASSERT_OK(cache->Insert("k1", item1, &CompressedSecondaryCacheTest::helper_,
str1.length()));
std::string str2 = rnd.RandomString(1020);
TestItem* item2 = new TestItem(str2.data(), str2.length());
// After Insert, lru cache contains k2 and secondary cache contains k1.
ASSERT_OK(cache->Insert("k2", item2, &LRUSecondaryCacheTest::helper_,
ASSERT_OK(cache->Insert("k2", item2, &CompressedSecondaryCacheTest::helper_,
str2.length()));
std::string str3 = rnd.RandomString(1020);
TestItem* item3 = new TestItem(str3.data(), str3.length());
// After Insert, lru cache contains k3 and secondary cache contains k1 and
// k2
ASSERT_OK(cache->Insert("k3", item3, &LRUSecondaryCacheTest::helper_,
ASSERT_OK(cache->Insert("k3", item3, &CompressedSecondaryCacheTest::helper_,
str3.length()));
Cache::Handle* handle;
handle =
cache->Lookup("k3", &LRUSecondaryCacheTest::helper_, test_item_creator,
Cache::Priority::LOW, true, stats.get());
handle = cache->Lookup("k3", &CompressedSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true,
stats.get());
ASSERT_NE(handle, nullptr);
TestItem* val3 = static_cast<TestItem*>(cache->Value(handle));
ASSERT_NE(val3, nullptr);
@ -279,34 +273,35 @@ class LRUSecondaryCacheTest : public testing::Test {
cache->Release(handle);
// Lookup an non-existent key.
handle =
cache->Lookup("k0", &LRUSecondaryCacheTest::helper_, test_item_creator,
Cache::Priority::LOW, true, stats.get());
handle = cache->Lookup("k0", &CompressedSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true,
stats.get());
ASSERT_EQ(handle, nullptr);
// This Lookup should promote k1 and demote k3, so k2 is evicted from the
// secondary cache. The lru cache contains k1 and secondary cache contains
// k3. item1 was Free(), so it cannot be compared against the item1.
handle =
cache->Lookup("k1", &LRUSecondaryCacheTest::helper_, test_item_creator,
Cache::Priority::LOW, true, stats.get());
// This Lookup should promote k1 and erase k1 from the secondary cache,
// then k3 is demoted. So k2 and k3 are in the secondary cache.
handle = cache->Lookup("k1", &CompressedSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true,
stats.get());
ASSERT_NE(handle, nullptr);
TestItem* val1_1 = static_cast<TestItem*>(cache->Value(handle));
ASSERT_NE(val1_1, nullptr);
ASSERT_EQ(memcmp(val1_1->Buf(), str1_clone.data(), str1_clone.size()), 0);
cache->Release(handle);
handle =
cache->Lookup("k2", &LRUSecondaryCacheTest::helper_, test_item_creator,
Cache::Priority::LOW, true, stats.get());
ASSERT_EQ(handle, nullptr);
handle = cache->Lookup("k2", &CompressedSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true,
stats.get());
ASSERT_NE(handle, nullptr);
cache->Release(handle);
cache.reset();
secondary_cache.reset();
}
void BasicIntegrationFailTest(bool sec_cache_is_compressed) {
LRUSecondaryCacheOptions secondary_cache_opts;
CompressedSecondaryCacheOptions secondary_cache_opts;
if (sec_cache_is_compressed) {
if (!LZ4_Supported()) {
@ -321,7 +316,7 @@ class LRUSecondaryCacheTest : public testing::Test {
secondary_cache_opts.num_shard_bits = 0;
secondary_cache_opts.metadata_charge_policy = kDontChargeCacheMetadata;
std::shared_ptr<SecondaryCache> secondary_cache =
NewLRUSecondaryCache(secondary_cache_opts);
NewCompressedSecondaryCache(secondary_cache_opts);
LRUCacheOptions opts(1024, 0, false, 0.5, nullptr, kDefaultToAdaptiveMutex,
kDontChargeCacheMetadata);
@ -333,7 +328,8 @@ class LRUSecondaryCacheTest : public testing::Test {
auto item1 =
std::unique_ptr<TestItem>(new TestItem(str1.data(), str1.length()));
ASSERT_NOK(cache->Insert("k1", item1.get(), nullptr, str1.length()));
ASSERT_OK(cache->Insert("k1", item1.get(), &LRUSecondaryCacheTest::helper_,
ASSERT_OK(cache->Insert("k1", item1.get(),
&CompressedSecondaryCacheTest::helper_,
str1.length()));
item1.release(); // Appease clang-analyze "potential memory leak"
@ -341,7 +337,7 @@ class LRUSecondaryCacheTest : public testing::Test {
handle = cache->Lookup("k2", nullptr, test_item_creator,
Cache::Priority::LOW, true);
ASSERT_EQ(handle, nullptr);
handle = cache->Lookup("k2", &LRUSecondaryCacheTest::helper_,
handle = cache->Lookup("k2", &CompressedSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, false);
ASSERT_EQ(handle, nullptr);
@ -350,7 +346,7 @@ class LRUSecondaryCacheTest : public testing::Test {
}
void IntegrationSaveFailTest(bool sec_cache_is_compressed) {
LRUSecondaryCacheOptions secondary_cache_opts;
CompressedSecondaryCacheOptions secondary_cache_opts;
if (sec_cache_is_compressed) {
if (!LZ4_Supported()) {
@ -366,7 +362,7 @@ class LRUSecondaryCacheTest : public testing::Test {
secondary_cache_opts.metadata_charge_policy = kDontChargeCacheMetadata;
std::shared_ptr<SecondaryCache> secondary_cache =
NewLRUSecondaryCache(secondary_cache_opts);
NewCompressedSecondaryCache(secondary_cache_opts);
LRUCacheOptions opts(1024, 0, false, 0.5, nullptr, kDefaultToAdaptiveMutex,
kDontChargeCacheMetadata);
@ -376,25 +372,27 @@ class LRUSecondaryCacheTest : public testing::Test {
Random rnd(301);
std::string str1 = rnd.RandomString(1020);
TestItem* item1 = new TestItem(str1.data(), str1.length());
ASSERT_OK(cache->Insert("k1", item1, &LRUSecondaryCacheTest::helper_fail_,
ASSERT_OK(cache->Insert("k1", item1,
&CompressedSecondaryCacheTest::helper_fail_,
str1.length()));
std::string str2 = rnd.RandomString(1020);
TestItem* item2 = new TestItem(str2.data(), str2.length());
// k1 should be demoted to the secondary cache.
ASSERT_OK(cache->Insert("k2", item2, &LRUSecondaryCacheTest::helper_fail_,
ASSERT_OK(cache->Insert("k2", item2,
&CompressedSecondaryCacheTest::helper_fail_,
str2.length()));
Cache::Handle* handle;
handle = cache->Lookup("k2", &LRUSecondaryCacheTest::helper_fail_,
handle = cache->Lookup("k2", &CompressedSecondaryCacheTest::helper_fail_,
test_item_creator, Cache::Priority::LOW, true);
ASSERT_NE(handle, nullptr);
cache->Release(handle);
// This lookup should fail, since k1 demotion would have failed
handle = cache->Lookup("k1", &LRUSecondaryCacheTest::helper_fail_,
handle = cache->Lookup("k1", &CompressedSecondaryCacheTest::helper_fail_,
test_item_creator, Cache::Priority::LOW, true);
ASSERT_EQ(handle, nullptr);
// Since k1 didn't get promoted, k2 should still be in cache
handle = cache->Lookup("k2", &LRUSecondaryCacheTest::helper_fail_,
handle = cache->Lookup("k2", &CompressedSecondaryCacheTest::helper_fail_,
test_item_creator, Cache::Priority::LOW, true);
ASSERT_NE(handle, nullptr);
cache->Release(handle);
@ -404,7 +402,7 @@ class LRUSecondaryCacheTest : public testing::Test {
}
void IntegrationCreateFailTest(bool sec_cache_is_compressed) {
LRUSecondaryCacheOptions secondary_cache_opts;
CompressedSecondaryCacheOptions secondary_cache_opts;
if (sec_cache_is_compressed) {
if (!LZ4_Supported()) {
@ -420,7 +418,7 @@ class LRUSecondaryCacheTest : public testing::Test {
secondary_cache_opts.metadata_charge_policy = kDontChargeCacheMetadata;
std::shared_ptr<SecondaryCache> secondary_cache =
NewLRUSecondaryCache(secondary_cache_opts);
NewCompressedSecondaryCache(secondary_cache_opts);
LRUCacheOptions opts(1024, 0, false, 0.5, nullptr, kDefaultToAdaptiveMutex,
kDontChargeCacheMetadata);
@ -430,27 +428,27 @@ class LRUSecondaryCacheTest : public testing::Test {
Random rnd(301);
std::string str1 = rnd.RandomString(1020);
TestItem* item1 = new TestItem(str1.data(), str1.length());
ASSERT_OK(cache->Insert("k1", item1, &LRUSecondaryCacheTest::helper_,
ASSERT_OK(cache->Insert("k1", item1, &CompressedSecondaryCacheTest::helper_,
str1.length()));
std::string str2 = rnd.RandomString(1020);
TestItem* item2 = new TestItem(str2.data(), str2.length());
// k1 should be demoted to the secondary cache.
ASSERT_OK(cache->Insert("k2", item2, &LRUSecondaryCacheTest::helper_,
ASSERT_OK(cache->Insert("k2", item2, &CompressedSecondaryCacheTest::helper_,
str2.length()));
Cache::Handle* handle;
SetFailCreate(true);
handle = cache->Lookup("k2", &LRUSecondaryCacheTest::helper_,
handle = cache->Lookup("k2", &CompressedSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true);
ASSERT_NE(handle, nullptr);
cache->Release(handle);
// This lookup should fail, since k1 creation would have failed
handle = cache->Lookup("k1", &LRUSecondaryCacheTest::helper_,
handle = cache->Lookup("k1", &CompressedSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true);
ASSERT_EQ(handle, nullptr);
// Since k1 didn't get promoted, k2 should still be in cache
handle = cache->Lookup("k2", &LRUSecondaryCacheTest::helper_,
handle = cache->Lookup("k2", &CompressedSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true);
ASSERT_NE(handle, nullptr);
cache->Release(handle);
@ -460,7 +458,7 @@ class LRUSecondaryCacheTest : public testing::Test {
}
void IntegrationFullCapacityTest(bool sec_cache_is_compressed) {
LRUSecondaryCacheOptions secondary_cache_opts;
CompressedSecondaryCacheOptions secondary_cache_opts;
if (sec_cache_is_compressed) {
if (!LZ4_Supported()) {
@ -476,7 +474,7 @@ class LRUSecondaryCacheTest : public testing::Test {
secondary_cache_opts.metadata_charge_policy = kDontChargeCacheMetadata;
std::shared_ptr<SecondaryCache> secondary_cache =
NewLRUSecondaryCache(secondary_cache_opts);
NewCompressedSecondaryCache(secondary_cache_opts);
LRUCacheOptions opts(1024, 0, /*_strict_capacity_limit=*/true, 0.5, nullptr,
kDefaultToAdaptiveMutex, kDontChargeCacheMetadata);
@ -486,31 +484,32 @@ class LRUSecondaryCacheTest : public testing::Test {
Random rnd(301);
std::string str1 = rnd.RandomString(1020);
TestItem* item1 = new TestItem(str1.data(), str1.length());
ASSERT_OK(cache->Insert("k1", item1, &LRUSecondaryCacheTest::helper_,
ASSERT_OK(cache->Insert("k1", item1, &CompressedSecondaryCacheTest::helper_,
str1.length()));
std::string str2 = rnd.RandomString(1020);
TestItem* item2 = new TestItem(str2.data(), str2.length());
// k1 should be demoted to the secondary cache.
ASSERT_OK(cache->Insert("k2", item2, &LRUSecondaryCacheTest::helper_,
ASSERT_OK(cache->Insert("k2", item2, &CompressedSecondaryCacheTest::helper_,
str2.length()));
Cache::Handle* handle;
handle = cache->Lookup("k2", &LRUSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true);
ASSERT_NE(handle, nullptr);
// k1 promotion should fail due to the block cache being at capacity,
// but the lookup should still succeed
Cache::Handle* handle2;
handle2 = cache->Lookup("k1", &LRUSecondaryCacheTest::helper_,
handle2 = cache->Lookup("k2", &CompressedSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true);
ASSERT_NE(handle2, nullptr);
// Since k1 didn't get inserted, k2 should still be in cache
cache->Release(handle);
cache->Release(handle2);
handle = cache->Lookup("k2", &LRUSecondaryCacheTest::helper_,
// k1 promotion should fail due to the block cache being at capacity,
// but the lookup should still succeed
Cache::Handle* handle1;
handle1 = cache->Lookup("k1", &CompressedSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true);
ASSERT_NE(handle, nullptr);
cache->Release(handle);
ASSERT_NE(handle1, nullptr);
cache->Release(handle1);
// Since k1 didn't get inserted, k2 should still be in cache
handle2 = cache->Lookup("k2", &CompressedSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true);
ASSERT_NE(handle2, nullptr);
cache->Release(handle2);
cache.reset();
secondary_cache.reset();
@ -520,72 +519,83 @@ class LRUSecondaryCacheTest : public testing::Test {
bool fail_create_;
};
Cache::CacheItemHelper LRUSecondaryCacheTest::helper_(
LRUSecondaryCacheTest::SizeCallback, LRUSecondaryCacheTest::SaveToCallback,
LRUSecondaryCacheTest::DeletionCallback);
Cache::CacheItemHelper CompressedSecondaryCacheTest::helper_(
CompressedSecondaryCacheTest::SizeCallback,
CompressedSecondaryCacheTest::SaveToCallback,
CompressedSecondaryCacheTest::DeletionCallback);
Cache::CacheItemHelper LRUSecondaryCacheTest::helper_fail_(
LRUSecondaryCacheTest::SizeCallback,
LRUSecondaryCacheTest::SaveToCallbackFail,
LRUSecondaryCacheTest::DeletionCallback);
Cache::CacheItemHelper CompressedSecondaryCacheTest::helper_fail_(
CompressedSecondaryCacheTest::SizeCallback,
CompressedSecondaryCacheTest::SaveToCallbackFail,
CompressedSecondaryCacheTest::DeletionCallback);
TEST_F(LRUSecondaryCacheTest, BasicTestWithNoCompression) {
TEST_F(CompressedSecondaryCacheTest, BasicTestWithNoCompression) {
BasicTest(false, false);
}
TEST_F(LRUSecondaryCacheTest, BasicTestWithMemoryAllocatorAndNoCompression) {
TEST_F(CompressedSecondaryCacheTest,
BasicTestWithMemoryAllocatorAndNoCompression) {
BasicTest(false, true);
}
TEST_F(LRUSecondaryCacheTest, BasicTestWithCompression) {
TEST_F(CompressedSecondaryCacheTest, BasicTestWithCompression) {
BasicTest(true, false);
}
TEST_F(LRUSecondaryCacheTest, BasicTestWithMemoryAllocatorAndCompression) {
TEST_F(CompressedSecondaryCacheTest,
BasicTestWithMemoryAllocatorAndCompression) {
BasicTest(true, true);
}
TEST_F(LRUSecondaryCacheTest, FailsTestWithNoCompression) { FailsTest(false); }
TEST_F(CompressedSecondaryCacheTest, FailsTestWithNoCompression) {
FailsTest(false);
}
TEST_F(LRUSecondaryCacheTest, FailsTestWithCompression) { FailsTest(true); }
TEST_F(CompressedSecondaryCacheTest, FailsTestWithCompression) {
FailsTest(true);
}
TEST_F(LRUSecondaryCacheTest, BasicIntegrationTestWithNoCompression) {
TEST_F(CompressedSecondaryCacheTest, BasicIntegrationTestWithNoCompression) {
BasicIntegrationTest(false);
}
TEST_F(LRUSecondaryCacheTest, BasicIntegrationTestWithCompression) {
TEST_F(CompressedSecondaryCacheTest, BasicIntegrationTestWithCompression) {
BasicIntegrationTest(true);
}
TEST_F(LRUSecondaryCacheTest, BasicIntegrationFailTestWithNoCompression) {
TEST_F(CompressedSecondaryCacheTest,
BasicIntegrationFailTestWithNoCompression) {
BasicIntegrationFailTest(false);
}
TEST_F(LRUSecondaryCacheTest, BasicIntegrationFailTestWithCompression) {
TEST_F(CompressedSecondaryCacheTest, BasicIntegrationFailTestWithCompression) {
BasicIntegrationFailTest(true);
}
TEST_F(LRUSecondaryCacheTest, IntegrationSaveFailTestWithNoCompression) {
TEST_F(CompressedSecondaryCacheTest, IntegrationSaveFailTestWithNoCompression) {
IntegrationSaveFailTest(false);
}
TEST_F(LRUSecondaryCacheTest, IntegrationSaveFailTestWithCompression) {
TEST_F(CompressedSecondaryCacheTest, IntegrationSaveFailTestWithCompression) {
IntegrationSaveFailTest(true);
}
TEST_F(LRUSecondaryCacheTest, IntegrationCreateFailTestWithNoCompression) {
TEST_F(CompressedSecondaryCacheTest,
IntegrationCreateFailTestWithNoCompression) {
IntegrationCreateFailTest(false);
}
TEST_F(LRUSecondaryCacheTest, IntegrationCreateFailTestWithCompression) {
TEST_F(CompressedSecondaryCacheTest, IntegrationCreateFailTestWithCompression) {
IntegrationCreateFailTest(true);
}
TEST_F(LRUSecondaryCacheTest, IntegrationFullCapacityTestWithNoCompression) {
TEST_F(CompressedSecondaryCacheTest,
IntegrationFullCapacityTestWithNoCompression) {
IntegrationFullCapacityTest(false);
}
TEST_F(LRUSecondaryCacheTest, IntegrationFullCapacityTestWithCompression) {
TEST_F(CompressedSecondaryCacheTest,
IntegrationFullCapacityTestWithCompression) {
IntegrationFullCapacityTest(true);
}

10
cache/lru_cache.cc vendored
View File

@ -298,7 +298,7 @@ void LRUCacheShard::SetCapacity(size_t capacity) {
// Free the entries outside of mutex for performance reasons.
for (auto entry : last_reference_list) {
if (secondary_cache_ && entry->IsSecondaryCacheCompatible() &&
!entry->IsPromoted()) {
!entry->IsInSecondaryCache()) {
secondary_cache_->Insert(entry->key(), entry->value, entry->info_.helper)
.PermitUncheckedError();
}
@ -373,7 +373,7 @@ Status LRUCacheShard::InsertItem(LRUHandle* e, Cache::Handle** handle,
// Free the entries here outside of mutex for performance reasons.
for (auto entry : last_reference_list) {
if (secondary_cache_ && entry->IsSecondaryCacheCompatible() &&
!entry->IsPromoted()) {
!entry->IsInSecondaryCache()) {
secondary_cache_->Insert(entry->key(), entry->value, entry->info_.helper)
.PermitUncheckedError();
}
@ -389,7 +389,6 @@ void LRUCacheShard::Promote(LRUHandle* e) {
assert(secondary_handle->IsReady());
e->SetIncomplete(false);
e->SetInCache(true);
e->SetPromoted(true);
e->value = secondary_handle->Value();
e->charge = secondary_handle->Size();
delete secondary_handle;
@ -446,8 +445,9 @@ Cache::Handle* LRUCacheShard::Lookup(
// accounting purposes, which we won't demote to the secondary cache
// anyway.
assert(create_cb && helper->del_cb);
bool is_in_sec_cache{false};
std::unique_ptr<SecondaryCacheResultHandle> secondary_handle =
secondary_cache_->Lookup(key, create_cb, wait);
secondary_cache_->Lookup(key, create_cb, wait, is_in_sec_cache);
if (secondary_handle != nullptr) {
e = reinterpret_cast<LRUHandle*>(
new char[sizeof(LRUHandle) - 1 + key.size()]);
@ -467,6 +467,7 @@ Cache::Handle* LRUCacheShard::Lookup(
if (wait) {
Promote(e);
e->SetIsInSecondaryCache(is_in_sec_cache);
if (!e->value) {
// The secondary cache returned a handle, but the lookup failed.
e->Unref();
@ -480,6 +481,7 @@ Cache::Handle* LRUCacheShard::Lookup(
// If wait is false, we always return a handle and let the caller
// release the handle after checking for success or failure.
e->SetIncomplete(true);
e->SetIsInSecondaryCache(is_in_sec_cache);
// This may be slightly inaccurate, if the lookup eventually fails.
// But the probability is very low.
PERF_COUNTER_ADD(secondary_cache_hit_count, 1);

19
cache/lru_cache.h vendored
View File

@ -85,8 +85,8 @@ struct LRUHandle {
IS_SECONDARY_CACHE_COMPATIBLE = (1 << 4),
// Is the handle still being read from a lower tier.
IS_PENDING = (1 << 5),
// Has the item been promoted from a lower tier.
IS_PROMOTED = (1 << 6),
// Whether this handle is still in a lower tier
IS_IN_SECONDARY_CACHE = (1 << 6),
};
uint8_t flags;
@ -129,7 +129,7 @@ struct LRUHandle {
#endif // __SANITIZE_THREAD__
}
bool IsPending() const { return flags & IS_PENDING; }
bool IsPromoted() const { return flags & IS_PROMOTED; }
bool IsInSecondaryCache() const { return flags & IS_IN_SECONDARY_CACHE; }
void SetInCache(bool in_cache) {
if (in_cache) {
@ -176,11 +176,11 @@ struct LRUHandle {
}
}
void SetPromoted(bool promoted) {
if (promoted) {
flags |= IS_PROMOTED;
void SetIsInSecondaryCache(bool is_in_secondary_cache) {
if (is_in_secondary_cache) {
flags |= IS_IN_SECONDARY_CACHE;
} else {
flags &= ~IS_PROMOTED;
flags &= ~IS_IN_SECONDARY_CACHE;
}
}
@ -371,8 +371,9 @@ class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard {
Status Insert(const Slice& key, uint32_t hash, void* value, size_t charge,
DeleterFn deleter, const Cache::CacheItemHelper* helper,
Cache::Handle** handle, Cache::Priority priority);
// Promote an item looked up from the secondary cache to the LRU cache. The
// item is only inserted into the hash table and not the LRU list, and only
// Promote an item looked up from the secondary cache to the LRU cache.
// The item may be still in the secondary cache.
// It is only inserted into the hash table and not the LRU list, and only
// if the cache is not at full capacity, as is the case during Insert. The
// caller should hold a reference on the LRUHandle. When the caller releases
// the last reference, the item is added to the LRU list.

View File

@ -266,12 +266,13 @@ class TestSecondaryCache : public SecondaryCache {
}
std::unique_ptr<SecondaryCacheResultHandle> Lookup(
const Slice& key, const Cache::CreateCallback& create_cb,
bool /*wait*/) override {
const Slice& key, const Cache::CreateCallback& create_cb, bool /*wait*/,
bool& is_in_sec_cache) override {
std::string key_str = key.ToString();
TEST_SYNC_POINT_CALLBACK("TestSecondaryCache::Lookup", &key_str);
std::unique_ptr<SecondaryCacheResultHandle> secondary_handle;
is_in_sec_cache = false;
ResultType type = ResultType::SUCCESS;
auto iter = result_map_.find(key.ToString());
if (iter != result_map_.end()) {
@ -296,6 +297,7 @@ class TestSecondaryCache : public SecondaryCache {
if (s.ok()) {
secondary_handle.reset(new TestSecondaryCacheResultHandle(
cache_.get(), handle, value, charge, type));
is_in_sec_cache = true;
} else {
cache_->Release(handle);
}
@ -383,10 +385,10 @@ class DBSecondaryCacheTest : public DBTestBase {
std::unique_ptr<Env> fault_env_;
};
class LRUSecondaryCacheTest : public LRUCacheTest {
class LRUCacheSecondaryCacheTest : public LRUCacheTest {
public:
LRUSecondaryCacheTest() : fail_create_(false) {}
~LRUSecondaryCacheTest() {}
LRUCacheSecondaryCacheTest() : fail_create_(false) {}
~LRUCacheSecondaryCacheTest() {}
protected:
class TestItem {
@ -449,16 +451,17 @@ class LRUSecondaryCacheTest : public LRUCacheTest {
bool fail_create_;
};
Cache::CacheItemHelper LRUSecondaryCacheTest::helper_(
LRUSecondaryCacheTest::SizeCallback, LRUSecondaryCacheTest::SaveToCallback,
LRUSecondaryCacheTest::DeletionCallback);
Cache::CacheItemHelper LRUCacheSecondaryCacheTest::helper_(
LRUCacheSecondaryCacheTest::SizeCallback,
LRUCacheSecondaryCacheTest::SaveToCallback,
LRUCacheSecondaryCacheTest::DeletionCallback);
Cache::CacheItemHelper LRUSecondaryCacheTest::helper_fail_(
LRUSecondaryCacheTest::SizeCallback,
LRUSecondaryCacheTest::SaveToCallbackFail,
LRUSecondaryCacheTest::DeletionCallback);
Cache::CacheItemHelper LRUCacheSecondaryCacheTest::helper_fail_(
LRUCacheSecondaryCacheTest::SizeCallback,
LRUCacheSecondaryCacheTest::SaveToCallbackFail,
LRUCacheSecondaryCacheTest::DeletionCallback);
TEST_F(LRUSecondaryCacheTest, BasicTest) {
TEST_F(LRUCacheSecondaryCacheTest, BasicTest) {
LRUCacheOptions opts(1024, 0, false, 0.5, nullptr, kDefaultToAdaptiveMutex,
kDontChargeCacheMetadata);
std::shared_ptr<TestSecondaryCache> secondary_cache =
@ -470,25 +473,25 @@ TEST_F(LRUSecondaryCacheTest, BasicTest) {
Random rnd(301);
std::string str1 = rnd.RandomString(1020);
TestItem* item1 = new TestItem(str1.data(), str1.length());
ASSERT_OK(cache->Insert("k1", item1, &LRUSecondaryCacheTest::helper_,
ASSERT_OK(cache->Insert("k1", item1, &LRUCacheSecondaryCacheTest::helper_,
str1.length()));
std::string str2 = rnd.RandomString(1020);
TestItem* item2 = new TestItem(str2.data(), str2.length());
// k1 should be demoted to NVM
ASSERT_OK(cache->Insert("k2", item2, &LRUSecondaryCacheTest::helper_,
ASSERT_OK(cache->Insert("k2", item2, &LRUCacheSecondaryCacheTest::helper_,
str2.length()));
get_perf_context()->Reset();
Cache::Handle* handle;
handle =
cache->Lookup("k2", &LRUSecondaryCacheTest::helper_, test_item_creator,
Cache::Priority::LOW, true, stats.get());
cache->Lookup("k2", &LRUCacheSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true, stats.get());
ASSERT_NE(handle, nullptr);
cache->Release(handle);
// This lookup should promote k1 and demote k2
handle =
cache->Lookup("k1", &LRUSecondaryCacheTest::helper_, test_item_creator,
Cache::Priority::LOW, true, stats.get());
cache->Lookup("k1", &LRUCacheSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true, stats.get());
ASSERT_NE(handle, nullptr);
cache->Release(handle);
ASSERT_EQ(secondary_cache->num_inserts(), 2u);
@ -502,7 +505,7 @@ TEST_F(LRUSecondaryCacheTest, BasicTest) {
secondary_cache.reset();
}
TEST_F(LRUSecondaryCacheTest, BasicFailTest) {
TEST_F(LRUCacheSecondaryCacheTest, BasicFailTest) {
LRUCacheOptions opts(1024, 0, false, 0.5, nullptr, kDefaultToAdaptiveMutex,
kDontChargeCacheMetadata);
std::shared_ptr<TestSecondaryCache> secondary_cache =
@ -515,15 +518,15 @@ TEST_F(LRUSecondaryCacheTest, BasicFailTest) {
auto item1 = std::make_unique<TestItem>(str1.data(), str1.length());
ASSERT_TRUE(cache->Insert("k1", item1.get(), nullptr, str1.length())
.IsInvalidArgument());
ASSERT_OK(cache->Insert("k1", item1.get(), &LRUSecondaryCacheTest::helper_,
str1.length()));
ASSERT_OK(cache->Insert("k1", item1.get(),
&LRUCacheSecondaryCacheTest::helper_, str1.length()));
item1.release(); // Appease clang-analyze "potential memory leak"
Cache::Handle* handle;
handle = cache->Lookup("k2", nullptr, test_item_creator, Cache::Priority::LOW,
true);
ASSERT_EQ(handle, nullptr);
handle = cache->Lookup("k2", &LRUSecondaryCacheTest::helper_,
handle = cache->Lookup("k2", &LRUCacheSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, false);
ASSERT_EQ(handle, nullptr);
@ -531,7 +534,7 @@ TEST_F(LRUSecondaryCacheTest, BasicFailTest) {
secondary_cache.reset();
}
TEST_F(LRUSecondaryCacheTest, SaveFailTest) {
TEST_F(LRUCacheSecondaryCacheTest, SaveFailTest) {
LRUCacheOptions opts(1024, 0, false, 0.5, nullptr, kDefaultToAdaptiveMutex,
kDontChargeCacheMetadata);
std::shared_ptr<TestSecondaryCache> secondary_cache =
@ -542,25 +545,25 @@ TEST_F(LRUSecondaryCacheTest, SaveFailTest) {
Random rnd(301);
std::string str1 = rnd.RandomString(1020);
TestItem* item1 = new TestItem(str1.data(), str1.length());
ASSERT_OK(cache->Insert("k1", item1, &LRUSecondaryCacheTest::helper_fail_,
str1.length()));
ASSERT_OK(cache->Insert(
"k1", item1, &LRUCacheSecondaryCacheTest::helper_fail_, str1.length()));
std::string str2 = rnd.RandomString(1020);
TestItem* item2 = new TestItem(str2.data(), str2.length());
// k1 should be demoted to NVM
ASSERT_OK(cache->Insert("k2", item2, &LRUSecondaryCacheTest::helper_fail_,
str2.length()));
ASSERT_OK(cache->Insert(
"k2", item2, &LRUCacheSecondaryCacheTest::helper_fail_, str2.length()));
Cache::Handle* handle;
handle = cache->Lookup("k2", &LRUSecondaryCacheTest::helper_fail_,
handle = cache->Lookup("k2", &LRUCacheSecondaryCacheTest::helper_fail_,
test_item_creator, Cache::Priority::LOW, true);
ASSERT_NE(handle, nullptr);
cache->Release(handle);
// This lookup should fail, since k1 demotion would have failed
handle = cache->Lookup("k1", &LRUSecondaryCacheTest::helper_fail_,
handle = cache->Lookup("k1", &LRUCacheSecondaryCacheTest::helper_fail_,
test_item_creator, Cache::Priority::LOW, true);
ASSERT_EQ(handle, nullptr);
// Since k1 didn't get promoted, k2 should still be in cache
handle = cache->Lookup("k2", &LRUSecondaryCacheTest::helper_fail_,
handle = cache->Lookup("k2", &LRUCacheSecondaryCacheTest::helper_fail_,
test_item_creator, Cache::Priority::LOW, true);
ASSERT_NE(handle, nullptr);
cache->Release(handle);
@ -571,7 +574,7 @@ TEST_F(LRUSecondaryCacheTest, SaveFailTest) {
secondary_cache.reset();
}
TEST_F(LRUSecondaryCacheTest, CreateFailTest) {
TEST_F(LRUCacheSecondaryCacheTest, CreateFailTest) {
LRUCacheOptions opts(1024, 0, false, 0.5, nullptr, kDefaultToAdaptiveMutex,
kDontChargeCacheMetadata);
std::shared_ptr<TestSecondaryCache> secondary_cache =
@ -582,26 +585,26 @@ TEST_F(LRUSecondaryCacheTest, CreateFailTest) {
Random rnd(301);
std::string str1 = rnd.RandomString(1020);
TestItem* item1 = new TestItem(str1.data(), str1.length());
ASSERT_OK(cache->Insert("k1", item1, &LRUSecondaryCacheTest::helper_,
ASSERT_OK(cache->Insert("k1", item1, &LRUCacheSecondaryCacheTest::helper_,
str1.length()));
std::string str2 = rnd.RandomString(1020);
TestItem* item2 = new TestItem(str2.data(), str2.length());
// k1 should be demoted to NVM
ASSERT_OK(cache->Insert("k2", item2, &LRUSecondaryCacheTest::helper_,
ASSERT_OK(cache->Insert("k2", item2, &LRUCacheSecondaryCacheTest::helper_,
str2.length()));
Cache::Handle* handle;
SetFailCreate(true);
handle = cache->Lookup("k2", &LRUSecondaryCacheTest::helper_,
handle = cache->Lookup("k2", &LRUCacheSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true);
ASSERT_NE(handle, nullptr);
cache->Release(handle);
// This lookup should fail, since k1 creation would have failed
handle = cache->Lookup("k1", &LRUSecondaryCacheTest::helper_,
handle = cache->Lookup("k1", &LRUCacheSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true);
ASSERT_EQ(handle, nullptr);
// Since k1 didn't get promoted, k2 should still be in cache
handle = cache->Lookup("k2", &LRUSecondaryCacheTest::helper_,
handle = cache->Lookup("k2", &LRUCacheSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true);
ASSERT_NE(handle, nullptr);
cache->Release(handle);
@ -612,7 +615,7 @@ TEST_F(LRUSecondaryCacheTest, CreateFailTest) {
secondary_cache.reset();
}
TEST_F(LRUSecondaryCacheTest, FullCapacityTest) {
TEST_F(LRUCacheSecondaryCacheTest, FullCapacityTest) {
LRUCacheOptions opts(1024, 0, /*_strict_capacity_limit=*/true, 0.5, nullptr,
kDefaultToAdaptiveMutex, kDontChargeCacheMetadata);
std::shared_ptr<TestSecondaryCache> secondary_cache =
@ -623,28 +626,28 @@ TEST_F(LRUSecondaryCacheTest, FullCapacityTest) {
Random rnd(301);
std::string str1 = rnd.RandomString(1020);
TestItem* item1 = new TestItem(str1.data(), str1.length());
ASSERT_OK(cache->Insert("k1", item1, &LRUSecondaryCacheTest::helper_,
ASSERT_OK(cache->Insert("k1", item1, &LRUCacheSecondaryCacheTest::helper_,
str1.length()));
std::string str2 = rnd.RandomString(1020);
TestItem* item2 = new TestItem(str2.data(), str2.length());
// k1 should be demoted to NVM
ASSERT_OK(cache->Insert("k2", item2, &LRUSecondaryCacheTest::helper_,
ASSERT_OK(cache->Insert("k2", item2, &LRUCacheSecondaryCacheTest::helper_,
str2.length()));
Cache::Handle* handle;
handle = cache->Lookup("k2", &LRUSecondaryCacheTest::helper_,
handle = cache->Lookup("k2", &LRUCacheSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true);
ASSERT_NE(handle, nullptr);
// k1 promotion should fail due to the block cache being at capacity,
// but the lookup should still succeed
Cache::Handle* handle2;
handle2 = cache->Lookup("k1", &LRUSecondaryCacheTest::helper_,
handle2 = cache->Lookup("k1", &LRUCacheSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true);
ASSERT_NE(handle2, nullptr);
// Since k1 didn't get inserted, k2 should still be in cache
cache->Release(handle);
cache->Release(handle2);
handle = cache->Lookup("k2", &LRUSecondaryCacheTest::helper_,
handle = cache->Lookup("k2", &LRUCacheSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true);
ASSERT_NE(handle, nullptr);
cache->Release(handle);
@ -1046,7 +1049,7 @@ TEST_F(DBSecondaryCacheTest, SecondaryCacheFailureTest) {
Destroy(options);
}
TEST_F(LRUSecondaryCacheTest, BasicWaitAllTest) {
TEST_F(LRUCacheSecondaryCacheTest, BasicWaitAllTest) {
LRUCacheOptions opts(1024, 2, false, 0.5, nullptr, kDefaultToAdaptiveMutex,
kDontChargeCacheMetadata);
std::shared_ptr<TestSecondaryCache> secondary_cache =
@ -1062,7 +1065,8 @@ TEST_F(LRUSecondaryCacheTest, BasicWaitAllTest) {
values.emplace_back(str);
TestItem* item = new TestItem(str.data(), str.length());
ASSERT_OK(cache->Insert("k" + std::to_string(i), item,
&LRUSecondaryCacheTest::helper_, str.length()));
&LRUCacheSecondaryCacheTest::helper_,
str.length()));
}
// Force all entries to be evicted to the secondary cache
cache->SetCapacity(0);
@ -1075,8 +1079,8 @@ TEST_F(LRUSecondaryCacheTest, BasicWaitAllTest) {
{"k5", TestSecondaryCache::ResultType::FAIL}});
std::vector<Cache::Handle*> results;
for (int i = 0; i < 6; ++i) {
results.emplace_back(
cache->Lookup("k" + std::to_string(i), &LRUSecondaryCacheTest::helper_,
results.emplace_back(cache->Lookup(
"k" + std::to_string(i), &LRUCacheSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, false));
}
cache->WaitAll(results);

View File

@ -366,19 +366,26 @@ TEST_F(DBBlobBasicTest, GetBlob_CorruptIndex) {
Reopen(options);
constexpr char key[] = "key";
constexpr char blob[] = "blob";
// Fake a corrupt blob index.
const std::string blob_index("foobar");
WriteBatch batch;
ASSERT_OK(WriteBatchInternal::PutBlobIndex(&batch, 0, key, blob_index));
ASSERT_OK(db_->Write(WriteOptions(), &batch));
ASSERT_OK(Put(key, blob));
ASSERT_OK(Flush());
SyncPoint::GetInstance()->SetCallBack(
"Version::Get::TamperWithBlobIndex", [](void* arg) {
Slice* const blob_index = static_cast<Slice*>(arg);
assert(blob_index);
assert(!blob_index->empty());
blob_index->remove_prefix(1);
});
SyncPoint::GetInstance()->EnableProcessing();
PinnableSlice result;
ASSERT_TRUE(db_->Get(ReadOptions(), db_->DefaultColumnFamily(), key, &result)
.IsCorruption());
SyncPoint::GetInstance()->DisableProcessing();
SyncPoint::GetInstance()->ClearAllCallBacks();
}
TEST_F(DBBlobBasicTest, MultiGetBlob_CorruptIndex) {
@ -401,17 +408,27 @@ TEST_F(DBBlobBasicTest, MultiGetBlob_CorruptIndex) {
}
constexpr char key[] = "key";
{
// Fake a corrupt blob index.
const std::string blob_index("foobar");
WriteBatch batch;
ASSERT_OK(WriteBatchInternal::PutBlobIndex(&batch, 0, key, blob_index));
ASSERT_OK(db_->Write(WriteOptions(), &batch));
keys[kNumOfKeys] = Slice(static_cast<const char*>(key), sizeof(key) - 1);
}
constexpr char blob[] = "blob";
ASSERT_OK(Put(key, blob));
keys[kNumOfKeys] = key;
ASSERT_OK(Flush());
SyncPoint::GetInstance()->SetCallBack(
"Version::MultiGet::TamperWithBlobIndex", [&key](void* arg) {
KeyContext* const key_context = static_cast<KeyContext*>(arg);
assert(key_context);
assert(key_context->key);
if (*(key_context->key) == key) {
Slice* const blob_index = key_context->value;
assert(blob_index);
assert(!blob_index->empty());
blob_index->remove_prefix(1);
}
});
SyncPoint::GetInstance()->EnableProcessing();
std::array<PinnableSlice, kNumOfKeys + 1> values;
std::array<Status, kNumOfKeys + 1> statuses;
db_->MultiGet(ReadOptions(), dbfull()->DefaultColumnFamily(), kNumOfKeys + 1,
@ -425,6 +442,9 @@ TEST_F(DBBlobBasicTest, MultiGetBlob_CorruptIndex) {
ASSERT_TRUE(statuses[i].IsCorruption());
}
}
SyncPoint::GetInstance()->DisableProcessing();
SyncPoint::GetInstance()->ClearAllCallBacks();
}
TEST_F(DBBlobBasicTest, MultiGetBlob_ExceedSoftLimit) {
@ -733,6 +753,14 @@ TEST_F(DBBlobBasicTest, Properties) {
&live_blob_file_size));
ASSERT_EQ(live_blob_file_size, total_expected_size);
// Total amount of garbage in live blob files
{
uint64_t live_blob_file_garbage_size = 0;
ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kLiveBlobFileGarbageSize,
&live_blob_file_garbage_size));
ASSERT_EQ(live_blob_file_garbage_size, 0);
}
// Total size of all blob files across all versions
// Note: this should be the same as above since we only have one
// version at this point.
@ -768,6 +796,14 @@ TEST_F(DBBlobBasicTest, Properties) {
<< "\nBlob file space amplification: " << expected_space_amp << '\n';
ASSERT_EQ(blob_stats, oss.str());
// Total amount of garbage in live blob files
{
uint64_t live_blob_file_garbage_size = 0;
ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kLiveBlobFileGarbageSize,
&live_blob_file_garbage_size));
ASSERT_EQ(live_blob_file_garbage_size, expected_garbage_size);
}
}
TEST_F(DBBlobBasicTest, PropertiesMultiVersion) {

View File

@ -415,16 +415,30 @@ TEST_F(DBBlobCompactionTest, CorruptedBlobIndex) {
new ValueMutationFilter(""));
options.compaction_filter = compaction_filter_guard.get();
DestroyAndReopen(options);
// Mock a corrupted blob index
constexpr char key[] = "key";
std::string blob_idx("blob_idx");
WriteBatch write_batch;
ASSERT_OK(WriteBatchInternal::PutBlobIndex(&write_batch, 0, key, blob_idx));
ASSERT_OK(db_->Write(WriteOptions(), &write_batch));
constexpr char blob[] = "blob";
ASSERT_OK(Put(key, blob));
ASSERT_OK(Flush());
SyncPoint::GetInstance()->SetCallBack(
"CompactionIterator::InvokeFilterIfNeeded::TamperWithBlobIndex",
[](void* arg) {
Slice* const blob_index = static_cast<Slice*>(arg);
assert(blob_index);
assert(!blob_index->empty());
blob_index->remove_prefix(1);
});
SyncPoint::GetInstance()->EnableProcessing();
ASSERT_TRUE(db_->CompactRange(CompactRangeOptions(), /*begin=*/nullptr,
/*end=*/nullptr)
.IsCorruption());
SyncPoint::GetInstance()->DisableProcessing();
SyncPoint::GetInstance()->ClearAllCallBacks();
Close();
}

View File

@ -13,6 +13,7 @@
#include <vector>
#include "db/arena_wrapped_db_iter.h"
#include "db/blob/blob_index.h"
#include "db/column_family.h"
#include "db/db_iter.h"
#include "db/db_test_util.h"
@ -138,20 +139,39 @@ class DBBlobIndexTest : public DBTestBase {
}
};
// Should be able to write kTypeBlobIndex to memtables and SST files.
// Note: the following test case pertains to the StackableDB-based BlobDB
// implementation. We should be able to write kTypeBlobIndex to memtables and
// SST files.
TEST_F(DBBlobIndexTest, Write) {
for (auto tier : kAllTiers) {
DestroyAndReopen(GetTestOptions());
for (int i = 1; i <= 5; i++) {
std::string index = ToString(i);
std::vector<std::pair<std::string, std::string>> key_values;
constexpr size_t num_key_values = 5;
key_values.reserve(num_key_values);
for (size_t i = 1; i <= num_key_values; ++i) {
std::string key = "key" + ToString(i);
std::string blob_index;
BlobIndex::EncodeInlinedTTL(&blob_index, /* expiration */ 9876543210,
"blob" + ToString(i));
key_values.emplace_back(std::move(key), std::move(blob_index));
}
for (const auto& key_value : key_values) {
WriteBatch batch;
ASSERT_OK(PutBlobIndex(&batch, "key" + index, "blob" + index));
ASSERT_OK(PutBlobIndex(&batch, key_value.first, key_value.second));
ASSERT_OK(Write(&batch));
}
MoveDataTo(tier);
for (int i = 1; i <= 5; i++) {
std::string index = ToString(i);
ASSERT_EQ("blob" + index, GetBlobIndex("key" + index));
for (const auto& key_value : key_values) {
ASSERT_EQ(GetBlobIndex(key_value.first), key_value.second);
}
}
}
@ -164,13 +184,19 @@ TEST_F(DBBlobIndexTest, Write) {
// accidentally opening the base DB of a stacked BlobDB and actual corruption
// when using the integrated BlobDB.
TEST_F(DBBlobIndexTest, Get) {
std::string blob_index;
BlobIndex::EncodeInlinedTTL(&blob_index, /* expiration */ 9876543210, "blob");
for (auto tier : kAllTiers) {
DestroyAndReopen(GetTestOptions());
WriteBatch batch;
ASSERT_OK(batch.Put("key", "value"));
ASSERT_OK(PutBlobIndex(&batch, "blob_key", "blob_index"));
ASSERT_OK(PutBlobIndex(&batch, "blob_key", blob_index));
ASSERT_OK(Write(&batch));
MoveDataTo(tier);
// Verify normal value
bool is_blob_index = false;
PinnableSlice value;
@ -178,6 +204,7 @@ TEST_F(DBBlobIndexTest, Get) {
ASSERT_EQ("value", GetImpl("key"));
ASSERT_EQ("value", GetImpl("key", &is_blob_index));
ASSERT_FALSE(is_blob_index);
// Verify blob index
if (tier <= kImmutableMemtables) {
ASSERT_TRUE(Get("blob_key", &value).IsNotSupported());
@ -186,7 +213,7 @@ TEST_F(DBBlobIndexTest, Get) {
ASSERT_TRUE(Get("blob_key", &value).IsCorruption());
ASSERT_EQ("CORRUPTION", GetImpl("blob_key"));
}
ASSERT_EQ("blob_index", GetImpl("blob_key", &is_blob_index));
ASSERT_EQ(blob_index, GetImpl("blob_key", &is_blob_index));
ASSERT_TRUE(is_blob_index);
}
}
@ -196,11 +223,14 @@ TEST_F(DBBlobIndexTest, Get) {
// if blob index is updated with a normal value. See the test case above for
// more details.
TEST_F(DBBlobIndexTest, Updated) {
std::string blob_index;
BlobIndex::EncodeInlinedTTL(&blob_index, /* expiration */ 9876543210, "blob");
for (auto tier : kAllTiers) {
DestroyAndReopen(GetTestOptions());
WriteBatch batch;
for (int i = 0; i < 10; i++) {
ASSERT_OK(PutBlobIndex(&batch, "key" + ToString(i), "blob_index"));
ASSERT_OK(PutBlobIndex(&batch, "key" + ToString(i), blob_index));
}
ASSERT_OK(Write(&batch));
// Avoid blob values from being purged.
@ -218,7 +248,7 @@ TEST_F(DBBlobIndexTest, Updated) {
ASSERT_OK(dbfull()->DeleteRange(WriteOptions(), cfh(), "key6", "key9"));
MoveDataTo(tier);
for (int i = 0; i < 10; i++) {
ASSERT_EQ("blob_index", GetBlobIndex("key" + ToString(i), snapshot));
ASSERT_EQ(blob_index, GetBlobIndex("key" + ToString(i), snapshot));
}
ASSERT_EQ("new_value", Get("key1"));
if (tier <= kImmutableMemtables) {
@ -232,7 +262,7 @@ TEST_F(DBBlobIndexTest, Updated) {
for (int i = 6; i < 9; i++) {
ASSERT_EQ("NOT_FOUND", Get("key" + ToString(i)));
}
ASSERT_EQ("blob_index", GetBlobIndex("key9"));
ASSERT_EQ(blob_index, GetBlobIndex("key9"));
dbfull()->ReleaseSnapshot(snapshot);
}
}

View File

@ -62,9 +62,9 @@ Status BuildTable(
FileMetaData* meta, std::vector<BlobFileAddition>* blob_file_additions,
std::vector<SequenceNumber> snapshots,
SequenceNumber earliest_write_conflict_snapshot,
SnapshotChecker* snapshot_checker, bool paranoid_file_checks,
InternalStats* internal_stats, IOStatus* io_status,
const std::shared_ptr<IOTracer>& io_tracer,
SequenceNumber job_snapshot, SnapshotChecker* snapshot_checker,
bool paranoid_file_checks, InternalStats* internal_stats,
IOStatus* io_status, const std::shared_ptr<IOTracer>& io_tracer,
BlobFileCreationReason blob_creation_reason, EventLogger* event_logger,
int job_id, const Env::IOPriority io_priority,
TableProperties* table_properties, Env::WriteLifeTimeHint write_hint,
@ -189,12 +189,13 @@ Status BuildTable(
CompactionIterator c_iter(
iter, tboptions.internal_comparator.user_comparator(), &merge,
kMaxSequenceNumber, &snapshots, earliest_write_conflict_snapshot,
snapshot_checker, env, ShouldReportDetailedTime(env, ioptions.stats),
job_snapshot, snapshot_checker, env,
ShouldReportDetailedTime(env, ioptions.stats),
true /* internal key corruption is not ok */, range_del_agg.get(),
blob_file_builder.get(), ioptions.allow_data_in_errors,
/*compaction=*/nullptr, compaction_filter.get(),
/*shutting_down=*/nullptr,
/*preserve_deletes_seqnum=*/0, /*manual_compaction_paused=*/nullptr,
/*manual_compaction_paused=*/nullptr,
/*manual_compaction_canceled=*/nullptr, db_options.info_log,
full_history_ts_low);
@ -211,7 +212,11 @@ Status BuildTable(
break;
}
builder->Add(key, value);
meta->UpdateBoundaries(key, value, ikey.sequence, ikey.type);
s = meta->UpdateBoundaries(key, value, ikey.sequence, ikey.type);
if (!s.ok()) {
break;
}
// TODO(noetzli): Update stats after flush, too.
if (io_priority == Env::IO_HIGH &&

View File

@ -57,9 +57,9 @@ extern Status BuildTable(
FileMetaData* meta, std::vector<BlobFileAddition>* blob_file_additions,
std::vector<SequenceNumber> snapshots,
SequenceNumber earliest_write_conflict_snapshot,
SnapshotChecker* snapshot_checker, bool paranoid_file_checks,
InternalStats* internal_stats, IOStatus* io_status,
const std::shared_ptr<IOTracer>& io_tracer,
SequenceNumber job_snapshot, SnapshotChecker* snapshot_checker,
bool paranoid_file_checks, InternalStats* internal_stats,
IOStatus* io_status, const std::shared_ptr<IOTracer>& io_tracer,
BlobFileCreationReason blob_creation_reason,
EventLogger* event_logger = nullptr, int job_id = 0,
const Env::IOPriority io_priority = Env::IO_HIGH,

View File

@ -4193,6 +4193,14 @@ rocksdb_cache_t* rocksdb_cache_create_lru(size_t capacity) {
return c;
}
rocksdb_cache_t* rocksdb_cache_create_lru_with_strict_capacity_limit(
size_t capacity) {
rocksdb_cache_t* c = new rocksdb_cache_t;
c->rep = NewLRUCache(capacity);
c->rep->SetStrictCapacityLimit(true);
return c;
}
rocksdb_cache_t* rocksdb_cache_create_lru_opts(
rocksdb_lru_cache_options_t* opt) {
rocksdb_cache_t* c = new rocksdb_cache_t;

View File

@ -9,10 +9,10 @@
#pragma once
#include <unordered_map>
#include <string>
#include <vector>
#include <atomic>
#include <string>
#include <unordered_map>
#include <vector>
#include "db/memtable_list.h"
#include "db/table_cache.h"
@ -25,6 +25,7 @@
#include "rocksdb/env.h"
#include "rocksdb/options.h"
#include "trace_replay/block_cache_tracer.h"
#include "util/hash_containers.h"
#include "util/thread_local.h"
namespace ROCKSDB_NAMESPACE {
@ -705,8 +706,8 @@ class ColumnFamilySet {
// * when reading, at least one condition needs to be satisfied:
// 1. DB mutex locked
// 2. accessed from a single-threaded write thread
std::unordered_map<std::string, uint32_t> column_families_;
std::unordered_map<uint32_t, ColumnFamilyData*> column_family_data_;
UnorderedMap<std::string, uint32_t> column_families_;
UnorderedMap<uint32_t, ColumnFamilyData*> column_family_data_;
uint32_t max_column_family_;
const FileOptions file_options_;

View File

@ -24,40 +24,37 @@ CompactionIterator::CompactionIterator(
InternalIterator* input, const Comparator* cmp, MergeHelper* merge_helper,
SequenceNumber last_sequence, std::vector<SequenceNumber>* snapshots,
SequenceNumber earliest_write_conflict_snapshot,
const SnapshotChecker* snapshot_checker, Env* env,
bool report_detailed_time, bool expect_valid_internal_key,
SequenceNumber job_snapshot, const SnapshotChecker* snapshot_checker,
Env* env, bool report_detailed_time, bool expect_valid_internal_key,
CompactionRangeDelAggregator* range_del_agg,
BlobFileBuilder* blob_file_builder, bool allow_data_in_errors,
const Compaction* compaction, const CompactionFilter* compaction_filter,
const std::atomic<bool>* shutting_down,
const SequenceNumber preserve_deletes_seqnum,
const std::atomic<int>* manual_compaction_paused,
const std::atomic<bool>* manual_compaction_canceled,
const std::shared_ptr<Logger> info_log,
const std::string* full_history_ts_low)
: CompactionIterator(
input, cmp, merge_helper, last_sequence, snapshots,
earliest_write_conflict_snapshot, snapshot_checker, env,
earliest_write_conflict_snapshot, job_snapshot, snapshot_checker, env,
report_detailed_time, expect_valid_internal_key, range_del_agg,
blob_file_builder, allow_data_in_errors,
std::unique_ptr<CompactionProxy>(
compaction ? new RealCompaction(compaction) : nullptr),
compaction_filter, shutting_down, preserve_deletes_seqnum,
manual_compaction_paused, manual_compaction_canceled, info_log,
full_history_ts_low) {}
compaction_filter, shutting_down, manual_compaction_paused,
manual_compaction_canceled, info_log, full_history_ts_low) {}
CompactionIterator::CompactionIterator(
InternalIterator* input, const Comparator* cmp, MergeHelper* merge_helper,
SequenceNumber /*last_sequence*/, std::vector<SequenceNumber>* snapshots,
SequenceNumber earliest_write_conflict_snapshot,
const SnapshotChecker* snapshot_checker, Env* env,
bool report_detailed_time, bool expect_valid_internal_key,
SequenceNumber job_snapshot, const SnapshotChecker* snapshot_checker,
Env* env, bool report_detailed_time, bool expect_valid_internal_key,
CompactionRangeDelAggregator* range_del_agg,
BlobFileBuilder* blob_file_builder, bool allow_data_in_errors,
std::unique_ptr<CompactionProxy> compaction,
const CompactionFilter* compaction_filter,
const std::atomic<bool>* shutting_down,
const SequenceNumber preserve_deletes_seqnum,
const std::atomic<int>* manual_compaction_paused,
const std::atomic<bool>* manual_compaction_canceled,
const std::shared_ptr<Logger> info_log,
@ -68,6 +65,7 @@ CompactionIterator::CompactionIterator(
merge_helper_(merge_helper),
snapshots_(snapshots),
earliest_write_conflict_snapshot_(earliest_write_conflict_snapshot),
job_snapshot_(job_snapshot),
snapshot_checker_(snapshot_checker),
env_(env),
clock_(env_->GetSystemClock().get()),
@ -80,7 +78,6 @@ CompactionIterator::CompactionIterator(
shutting_down_(shutting_down),
manual_compaction_paused_(manual_compaction_paused),
manual_compaction_canceled_(manual_compaction_canceled),
preserve_deletes_seqnum_(preserve_deletes_seqnum),
info_log_(info_log),
allow_data_in_errors_(allow_data_in_errors),
timestamp_size_(cmp_ ? cmp_->timestamp_size() : 0),
@ -237,6 +234,10 @@ bool CompactionIterator::InvokeFilterIfNeeded(bool* need_skip,
return false;
}
TEST_SYNC_POINT_CALLBACK(
"CompactionIterator::InvokeFilterIfNeeded::TamperWithBlobIndex",
&value_);
// For integrated BlobDB impl, CompactionIterator reads blob value.
// For Stacked BlobDB impl, the corresponding CompactionFilter's
// FilterV2 method should read the blob value.
@ -758,7 +759,6 @@ void CompactionIterator::NextFromInput() {
(ikey_.type == kTypeDeletionWithTimestamp &&
cmp_with_history_ts_low_ < 0)) &&
DefinitelyInSnapshot(ikey_.sequence, earliest_snapshot_) &&
ikeyNotNeededForIncrementalSnapshot() &&
compaction_->KeyNotExistsBeyondOutputLevel(ikey_.user_key,
&level_ptrs_)) {
// TODO(noetzli): This is the only place where we use compaction_
@ -792,7 +792,7 @@ void CompactionIterator::NextFromInput() {
} else if ((ikey_.type == kTypeDeletion ||
(ikey_.type == kTypeDeletionWithTimestamp &&
cmp_with_history_ts_low_ < 0)) &&
bottommost_level_ && ikeyNotNeededForIncrementalSnapshot()) {
bottommost_level_) {
// Handle the case where we have a delete key at the bottom most level
// We can skip outputting the key iff there are no subsequent puts for this
// key
@ -954,6 +954,10 @@ void CompactionIterator::GarbageCollectBlobIfNeeded() {
// GC for integrated BlobDB
if (compaction_->enable_blob_garbage_collection()) {
TEST_SYNC_POINT_CALLBACK(
"CompactionIterator::GarbageCollectBlobIfNeeded::TamperWithBlobIndex",
&value_);
BlobIndex blob_index;
{
@ -1060,10 +1064,9 @@ void CompactionIterator::PrepareOutput() {
// Can we do the same for levels above bottom level as long as
// KeyNotExistsBeyondOutputLevel() return true?
if (valid_ && compaction_ != nullptr &&
!compaction_->allow_ingest_behind() &&
ikeyNotNeededForIncrementalSnapshot() && bottommost_level_ &&
!compaction_->allow_ingest_behind() && bottommost_level_ &&
DefinitelyInSnapshot(ikey_.sequence, earliest_snapshot_) &&
ikey_.type != kTypeMerge) {
ikey_.type != kTypeMerge && current_key_committed_) {
assert(ikey_.type != kTypeDeletion);
assert(ikey_.type != kTypeSingleDeletion ||
(timestamp_size_ || full_history_ts_low_));
@ -1139,13 +1142,6 @@ inline SequenceNumber CompactionIterator::findEarliestVisibleSnapshot(
return kMaxSequenceNumber;
}
// used in 2 places - prevents deletion markers to be dropped if they may be
// needed and disables seqnum zero-out in PrepareOutput for recent keys.
inline bool CompactionIterator::ikeyNotNeededForIncrementalSnapshot() {
return (!compaction_->preserve_deletes()) ||
(ikey_.sequence < preserve_deletes_seqnum_);
}
uint64_t CompactionIterator::ComputeBlobGarbageCollectionCutoffFileNumber(
const CompactionProxy* compaction) {
if (!compaction) {

View File

@ -92,8 +92,6 @@ class CompactionIterator {
virtual bool allow_ingest_behind() const = 0;
virtual bool preserve_deletes() const = 0;
virtual bool allow_mmap_reads() const = 0;
virtual bool enable_blob_garbage_collection() const = 0;
@ -139,8 +137,6 @@ class CompactionIterator {
return compaction_->immutable_options()->allow_ingest_behind;
}
bool preserve_deletes() const override { return false; }
bool allow_mmap_reads() const override {
return compaction_->immutable_options()->allow_mmap_reads;
}
@ -176,14 +172,13 @@ class CompactionIterator {
InternalIterator* input, const Comparator* cmp, MergeHelper* merge_helper,
SequenceNumber last_sequence, std::vector<SequenceNumber>* snapshots,
SequenceNumber earliest_write_conflict_snapshot,
const SnapshotChecker* snapshot_checker, Env* env,
bool report_detailed_time, bool expect_valid_internal_key,
SequenceNumber job_snapshot, const SnapshotChecker* snapshot_checker,
Env* env, bool report_detailed_time, bool expect_valid_internal_key,
CompactionRangeDelAggregator* range_del_agg,
BlobFileBuilder* blob_file_builder, bool allow_data_in_errors,
const Compaction* compaction = nullptr,
const CompactionFilter* compaction_filter = nullptr,
const std::atomic<bool>* shutting_down = nullptr,
const SequenceNumber preserve_deletes_seqnum = 0,
const std::atomic<int>* manual_compaction_paused = nullptr,
const std::atomic<bool>* manual_compaction_canceled = nullptr,
const std::shared_ptr<Logger> info_log = nullptr,
@ -194,14 +189,13 @@ class CompactionIterator {
InternalIterator* input, const Comparator* cmp, MergeHelper* merge_helper,
SequenceNumber last_sequence, std::vector<SequenceNumber>* snapshots,
SequenceNumber earliest_write_conflict_snapshot,
const SnapshotChecker* snapshot_checker, Env* env,
bool report_detailed_time, bool expect_valid_internal_key,
SequenceNumber job_snapshot, const SnapshotChecker* snapshot_checker,
Env* env, bool report_detailed_time, bool expect_valid_internal_key,
CompactionRangeDelAggregator* range_del_agg,
BlobFileBuilder* blob_file_builder, bool allow_data_in_errors,
std::unique_ptr<CompactionProxy> compaction,
const CompactionFilter* compaction_filter = nullptr,
const std::atomic<bool>* shutting_down = nullptr,
const SequenceNumber preserve_deletes_seqnum = 0,
const std::atomic<int>* manual_compaction_paused = nullptr,
const std::atomic<bool>* manual_compaction_canceled = nullptr,
const std::shared_ptr<Logger> info_log = nullptr,
@ -272,14 +266,9 @@ class CompactionIterator {
inline SequenceNumber findEarliestVisibleSnapshot(
SequenceNumber in, SequenceNumber* prev_snapshot);
// Checks whether the currently seen ikey_ is needed for
// incremental (differential) snapshot and hence can't be dropped
// or seqnum be zero-ed out even if all other conditions for it are met.
inline bool ikeyNotNeededForIncrementalSnapshot();
inline bool KeyCommitted(SequenceNumber sequence) {
return snapshot_checker_ == nullptr ||
snapshot_checker_->CheckInSnapshot(sequence, kMaxSequenceNumber) ==
snapshot_checker_->CheckInSnapshot(sequence, job_snapshot_) ==
SnapshotCheckerResult::kInSnapshot;
}
@ -320,6 +309,7 @@ class CompactionIterator {
std::unordered_set<SequenceNumber> released_snapshots_;
std::vector<SequenceNumber>::const_iterator earliest_snapshot_iter_;
const SequenceNumber earliest_write_conflict_snapshot_;
const SequenceNumber job_snapshot_;
const SnapshotChecker* const snapshot_checker_;
Env* env_;
SystemClock* clock_;
@ -332,7 +322,6 @@ class CompactionIterator {
const std::atomic<bool>* shutting_down_;
const std::atomic<int>* manual_compaction_paused_;
const std::atomic<bool>* manual_compaction_canceled_;
const SequenceNumber preserve_deletes_seqnum_;
bool bottommost_level_;
bool valid_ = false;
bool visible_at_tip_;

View File

@ -166,8 +166,6 @@ class FakeCompaction : public CompactionIterator::CompactionProxy {
bool allow_ingest_behind() const override { return is_allow_ingest_behind; }
bool preserve_deletes() const override { return false; }
bool allow_mmap_reads() const override { return false; }
bool enable_blob_garbage_collection() const override { return false; }
@ -277,11 +275,11 @@ class CompactionIteratorTest : public testing::TestWithParam<bool> {
iter_->SeekToFirst();
c_iter_.reset(new CompactionIterator(
iter_.get(), cmp_, merge_helper_.get(), last_sequence, &snapshots_,
earliest_write_conflict_snapshot, snapshot_checker_.get(),
Env::Default(), false /* report_detailed_time */, false,
range_del_agg_.get(), nullptr /* blob_file_builder */,
true /*allow_data_in_errors*/, std::move(compaction), filter,
&shutting_down_, /*preserve_deletes_seqnum=*/0,
earliest_write_conflict_snapshot, kMaxSequenceNumber,
snapshot_checker_.get(), Env::Default(),
false /* report_detailed_time */, false, range_del_agg_.get(),
nullptr /* blob_file_builder */, true /*allow_data_in_errors*/,
std::move(compaction), filter, &shutting_down_,
/*manual_compaction_paused=*/nullptr,
/*manual_compaction_canceled=*/nullptr, /*info_log=*/nullptr,
full_history_ts_low));

View File

@ -417,16 +417,17 @@ CompactionJob::CompactionJob(
int job_id, Compaction* compaction, const ImmutableDBOptions& db_options,
const MutableDBOptions& mutable_db_options, const FileOptions& file_options,
VersionSet* versions, const std::atomic<bool>* shutting_down,
const SequenceNumber preserve_deletes_seqnum, LogBuffer* log_buffer,
FSDirectory* db_directory, FSDirectory* output_directory,
FSDirectory* blob_output_directory, Statistics* stats,
InstrumentedMutex* db_mutex, ErrorHandler* db_error_handler,
LogBuffer* log_buffer, FSDirectory* db_directory,
FSDirectory* output_directory, FSDirectory* blob_output_directory,
Statistics* stats, InstrumentedMutex* db_mutex,
ErrorHandler* db_error_handler,
std::vector<SequenceNumber> existing_snapshots,
SequenceNumber earliest_write_conflict_snapshot,
const SnapshotChecker* snapshot_checker, std::shared_ptr<Cache> table_cache,
EventLogger* event_logger, bool paranoid_file_checks, bool measure_io_stats,
const std::string& dbname, CompactionJobStats* compaction_job_stats,
Env::Priority thread_pri, const std::shared_ptr<IOTracer>& io_tracer,
const SnapshotChecker* snapshot_checker, JobContext* job_context,
std::shared_ptr<Cache> table_cache, EventLogger* event_logger,
bool paranoid_file_checks, bool measure_io_stats, const std::string& dbname,
CompactionJobStats* compaction_job_stats, Env::Priority thread_pri,
const std::shared_ptr<IOTracer>& io_tracer,
const std::atomic<int>* manual_compaction_paused,
const std::atomic<bool>* manual_compaction_canceled,
const std::string& db_id, const std::string& db_session_id,
@ -456,7 +457,6 @@ CompactionJob::CompactionJob(
shutting_down_(shutting_down),
manual_compaction_paused_(manual_compaction_paused),
manual_compaction_canceled_(manual_compaction_canceled),
preserve_deletes_seqnum_(preserve_deletes_seqnum),
db_directory_(db_directory),
blob_output_directory_(blob_output_directory),
db_mutex_(db_mutex),
@ -464,6 +464,7 @@ CompactionJob::CompactionJob(
existing_snapshots_(std::move(existing_snapshots)),
earliest_write_conflict_snapshot_(earliest_write_conflict_snapshot),
snapshot_checker_(snapshot_checker),
job_context_(job_context),
table_cache_(std::move(table_cache)),
event_logger_(event_logger),
paranoid_file_checks_(paranoid_file_checks),
@ -1252,7 +1253,7 @@ void CompactionJob::NotifyOnSubcompactionBegin(
if (shutting_down_->load(std::memory_order_acquire)) {
return;
}
if (c->is_manual_compaction() &&
if (c->is_manual_compaction() && manual_compaction_paused_ &&
manual_compaction_paused_->load(std::memory_order_acquire) > 0) {
return;
}
@ -1469,15 +1470,18 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
Status status;
const std::string* const full_history_ts_low =
full_history_ts_low_.empty() ? nullptr : &full_history_ts_low_;
const SequenceNumber job_snapshot_seq =
job_context_ ? job_context_->GetJobSnapshotSequence()
: kMaxSequenceNumber;
sub_compact->c_iter.reset(new CompactionIterator(
input, cfd->user_comparator(), &merge, versions_->LastSequence(),
&existing_snapshots_, earliest_write_conflict_snapshot_,
&existing_snapshots_, earliest_write_conflict_snapshot_, job_snapshot_seq,
snapshot_checker_, env_, ShouldReportDetailedTime(env_, stats_),
/*expect_valid_internal_key=*/true, &range_del_agg,
blob_file_builder.get(), db_options_.allow_data_in_errors,
sub_compact->compaction, compaction_filter, shutting_down_,
preserve_deletes_seqnum_, manual_compaction_paused_,
manual_compaction_canceled_, db_options_.info_log, full_history_ts_low));
manual_compaction_paused_, manual_compaction_canceled_,
db_options_.info_log, full_history_ts_low));
auto c_iter = sub_compact->c_iter.get();
c_iter->SeekToFirst();
if (c_iter->Valid() && sub_compact->compaction->output_level() != 0) {
@ -1529,11 +1533,15 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
break;
}
const ParsedInternalKey& ikey = c_iter->ikey();
status = sub_compact->current_output()->meta.UpdateBoundaries(
key, value, ikey.sequence, ikey.type);
if (!status.ok()) {
break;
}
sub_compact->current_output_file_size =
sub_compact->builder->EstimatedFileSize();
const ParsedInternalKey& ikey = c_iter->ikey();
sub_compact->current_output()->meta.UpdateBoundaries(
key, value, ikey.sequence, ikey.type);
sub_compact->num_output_records++;
// Close output file if it is big enough. Two possibilities determine it's
@ -2488,19 +2496,20 @@ CompactionServiceCompactionJob::CompactionServiceCompactionJob(
std::vector<SequenceNumber> existing_snapshots,
std::shared_ptr<Cache> table_cache, EventLogger* event_logger,
const std::string& dbname, const std::shared_ptr<IOTracer>& io_tracer,
const std::atomic<bool>* manual_compaction_canceled,
const std::string& db_id, const std::string& db_session_id,
const std::string& output_path,
const CompactionServiceInput& compaction_service_input,
CompactionServiceResult* compaction_service_result)
: CompactionJob(
job_id, compaction, db_options, mutable_db_options, file_options,
versions, shutting_down, 0, log_buffer, nullptr, output_directory,
versions, shutting_down, log_buffer, nullptr, output_directory,
nullptr, stats, db_mutex, db_error_handler, existing_snapshots,
kMaxSequenceNumber, nullptr, table_cache, event_logger,
kMaxSequenceNumber, nullptr, nullptr, table_cache, event_logger,
compaction->mutable_cf_options()->paranoid_file_checks,
compaction->mutable_cf_options()->report_bg_io_stats, dbname,
&(compaction_service_result->stats), Env::Priority::USER, io_tracer,
nullptr, nullptr, db_id, db_session_id,
nullptr, manual_compaction_canceled, db_id, db_session_id,
compaction->column_family_data()->GetFullHistoryTsLow()),
output_path_(output_path),
compaction_input_(compaction_service_input),

View File

@ -67,14 +67,13 @@ class CompactionJob {
int job_id, Compaction* compaction, const ImmutableDBOptions& db_options,
const MutableDBOptions& mutable_db_options,
const FileOptions& file_options, VersionSet* versions,
const std::atomic<bool>* shutting_down,
const SequenceNumber preserve_deletes_seqnum, LogBuffer* log_buffer,
const std::atomic<bool>* shutting_down, LogBuffer* log_buffer,
FSDirectory* db_directory, FSDirectory* output_directory,
FSDirectory* blob_output_directory, Statistics* stats,
InstrumentedMutex* db_mutex, ErrorHandler* db_error_handler,
std::vector<SequenceNumber> existing_snapshots,
SequenceNumber earliest_write_conflict_snapshot,
const SnapshotChecker* snapshot_checker,
const SnapshotChecker* snapshot_checker, JobContext* job_context,
std::shared_ptr<Cache> table_cache, EventLogger* event_logger,
bool paranoid_file_checks, bool measure_io_stats,
const std::string& dbname, CompactionJobStats* compaction_job_stats,
@ -196,7 +195,6 @@ class CompactionJob {
const std::atomic<bool>* shutting_down_;
const std::atomic<int>* manual_compaction_paused_;
const std::atomic<bool>* manual_compaction_canceled_;
const SequenceNumber preserve_deletes_seqnum_;
FSDirectory* db_directory_;
FSDirectory* blob_output_directory_;
InstrumentedMutex* db_mutex_;
@ -214,6 +212,8 @@ class CompactionJob {
const SnapshotChecker* const snapshot_checker_;
JobContext* job_context_;
std::shared_ptr<Cache> table_cache_;
EventLogger* event_logger_;
@ -345,6 +345,7 @@ class CompactionServiceCompactionJob : private CompactionJob {
std::vector<SequenceNumber> existing_snapshots,
std::shared_ptr<Cache> table_cache, EventLogger* event_logger,
const std::string& dbname, const std::shared_ptr<IOTracer>& io_tracer,
const std::atomic<bool>* manual_compaction_canceled,
const std::string& db_id, const std::string& db_session_id,
const std::string& output_path,
const CompactionServiceInput& compaction_service_input,

View File

@ -87,7 +87,6 @@ class CompactionJobTestBase : public testing::Test {
/*block_cache_tracer=*/nullptr,
/*io_tracer=*/nullptr, /*db_session_id*/ "")),
shutting_down_(false),
preserve_deletes_seqnum_(0),
mock_table_factory_(new mock::MockTableFactory()),
error_handler_(nullptr, db_options_, &mutex_),
encode_u64_ts_(std::move(encode_u64_ts)) {
@ -354,11 +353,11 @@ class CompactionJobTestBase : public testing::Test {
ucmp_->timestamp_size() == full_history_ts_low_.size());
CompactionJob compaction_job(
0, &compaction, db_options_, mutable_db_options_, env_options_,
versions_.get(), &shutting_down_, preserve_deletes_seqnum_, &log_buffer,
nullptr, nullptr, nullptr, nullptr, &mutex_, &error_handler_, snapshots,
earliest_write_conflict_snapshot, snapshot_checker, table_cache_,
&event_logger, false, false, dbname_, &compaction_job_stats_,
Env::Priority::USER, nullptr /* IOTracer */,
versions_.get(), &shutting_down_, &log_buffer, nullptr, nullptr,
nullptr, nullptr, &mutex_, &error_handler_, snapshots,
earliest_write_conflict_snapshot, snapshot_checker, nullptr,
table_cache_, &event_logger, false, false, dbname_,
&compaction_job_stats_, Env::Priority::USER, nullptr /* IOTracer */,
/*manual_compaction_paused=*/nullptr,
/*manual_compaction_canceled=*/nullptr, /*db_id=*/"",
/*db_session_id=*/"", full_history_ts_low_);
@ -409,7 +408,6 @@ class CompactionJobTestBase : public testing::Test {
std::unique_ptr<VersionSet> versions_;
InstrumentedMutex mutex_;
std::atomic<bool> shutting_down_;
SequenceNumber preserve_deletes_seqnum_;
std::shared_ptr<mock::MockTableFactory> mock_table_factory_;
CompactionJobStats compaction_job_stats_;
ColumnFamilyData* cfd_;

View File

@ -12,13 +12,16 @@ namespace ROCKSDB_NAMESPACE {
class MyTestCompactionService : public CompactionService {
public:
MyTestCompactionService(std::string db_path, Options& options,
std::shared_ptr<Statistics>& statistics)
MyTestCompactionService(
std::string db_path, Options& options,
std::shared_ptr<Statistics>& statistics,
std::vector<std::shared_ptr<EventListener>>& listeners)
: db_path_(std::move(db_path)),
options_(options),
statistics_(statistics),
start_info_("na", "na", "na", 0, Env::TOTAL),
wait_info_("na", "na", "na", 0, Env::TOTAL) {}
wait_info_("na", "na", "na", 0, Env::TOTAL),
listeners_(listeners) {}
static const char* kClassName() { return "MyTestCompactionService"; }
@ -71,9 +74,16 @@ class MyTestCompactionService : public CompactionService {
options_override.table_factory = options_.table_factory;
options_override.sst_partitioner_factory = options_.sst_partitioner_factory;
options_override.statistics = statistics_;
if (!listeners_.empty()) {
options_override.listeners = listeners_;
}
OpenAndCompactOptions options;
options.canceled = &canceled_;
Status s = DB::OpenAndCompact(
db_path_, db_path_ + "/" + ROCKSDB_NAMESPACE::ToString(info.job_id),
options, db_path_,
db_path_ + "/" + ROCKSDB_NAMESPACE::ToString(info.job_id),
compaction_input, compaction_service_result, options_override);
if (is_override_wait_result_) {
*compaction_service_result = override_wait_result_;
@ -112,6 +122,8 @@ class MyTestCompactionService : public CompactionService {
is_override_wait_status_ = false;
}
void SetCanceled(bool canceled) { canceled_ = canceled; }
private:
InstrumentedMutex mutex_;
std::atomic_int compaction_num_{0};
@ -129,6 +141,8 @@ class MyTestCompactionService : public CompactionService {
CompactionServiceJobStatus::kFailure;
bool is_override_wait_result_ = false;
std::string override_wait_result_;
std::vector<std::shared_ptr<EventListener>> listeners_;
std::atomic_bool canceled_{false};
};
class CompactionServiceTest : public DBTestBase {
@ -144,7 +158,7 @@ class CompactionServiceTest : public DBTestBase {
compactor_statistics_ = CreateDBStatistics();
compaction_service_ = std::make_shared<MyTestCompactionService>(
dbname_, *options, compactor_statistics_);
dbname_, *options, compactor_statistics_, remote_listeners);
options->compaction_service = compaction_service_;
DestroyAndReopen(*options);
}
@ -192,6 +206,8 @@ class CompactionServiceTest : public DBTestBase {
}
}
std::vector<std::shared_ptr<EventListener>> remote_listeners;
private:
std::shared_ptr<Statistics> compactor_statistics_;
std::shared_ptr<Statistics> primary_statistics_;
@ -322,6 +338,51 @@ TEST_F(CompactionServiceTest, ManualCompaction) {
VerifyTestData();
}
TEST_F(CompactionServiceTest, CancelCompactionOnRemoteSide) {
Options options = CurrentOptions();
options.disable_auto_compactions = true;
ReopenWithCompactionService(&options);
GenerateTestData();
auto my_cs = GetCompactionService();
std::string start_str = Key(15);
std::string end_str = Key(45);
Slice start(start_str);
Slice end(end_str);
uint64_t comp_num = my_cs->GetCompactionNum();
// Test cancel compaction at the beginning
my_cs->SetCanceled(true);
auto s = db_->CompactRange(CompactRangeOptions(), &start, &end);
ASSERT_TRUE(s.IsIncomplete());
// compaction number is not increased
ASSERT_GE(my_cs->GetCompactionNum(), comp_num);
VerifyTestData();
// Test cancel compaction in progress
ReopenWithCompactionService(&options);
GenerateTestData();
my_cs = GetCompactionService();
my_cs->SetCanceled(false);
std::atomic_bool cancel_issued{false};
SyncPoint::GetInstance()->SetCallBack("CompactionJob::Run():Inprogress",
[&](void* /*arg*/) {
cancel_issued = true;
my_cs->SetCanceled(true);
});
SyncPoint::GetInstance()->EnableProcessing();
s = db_->CompactRange(CompactRangeOptions(), &start, &end);
ASSERT_TRUE(s.IsIncomplete());
ASSERT_TRUE(cancel_issued);
// compaction number is not increased
ASSERT_GE(my_cs->GetCompactionNum(), comp_num);
VerifyTestData();
}
TEST_F(CompactionServiceTest, FailedToStart) {
Options options = CurrentOptions();
options.disable_auto_compactions = true;
@ -685,6 +746,88 @@ TEST_F(CompactionServiceTest, FallbackLocalManual) {
VerifyTestData();
}
TEST_F(CompactionServiceTest, RemoteEventListener) {
class RemoteEventListenerTest : public EventListener {
public:
const char* Name() const override { return "RemoteEventListenerTest"; }
void OnSubcompactionBegin(const SubcompactionJobInfo& info) override {
auto result = on_going_compactions.emplace(info.job_id);
ASSERT_TRUE(result.second); // make sure there's no duplication
compaction_num++;
EventListener::OnSubcompactionBegin(info);
}
void OnSubcompactionCompleted(const SubcompactionJobInfo& info) override {
auto num = on_going_compactions.erase(info.job_id);
ASSERT_TRUE(num == 1); // make sure the compaction id exists
EventListener::OnSubcompactionCompleted(info);
}
void OnTableFileCreated(const TableFileCreationInfo& info) override {
ASSERT_EQ(on_going_compactions.count(info.job_id), 1);
file_created++;
EventListener::OnTableFileCreated(info);
}
void OnTableFileCreationStarted(
const TableFileCreationBriefInfo& info) override {
ASSERT_EQ(on_going_compactions.count(info.job_id), 1);
file_creation_started++;
EventListener::OnTableFileCreationStarted(info);
}
bool ShouldBeNotifiedOnFileIO() override {
file_io_notified++;
return EventListener::ShouldBeNotifiedOnFileIO();
}
std::atomic_uint64_t file_io_notified{0};
std::atomic_uint64_t file_creation_started{0};
std::atomic_uint64_t file_created{0};
std::set<int> on_going_compactions; // store the job_id
std::atomic_uint64_t compaction_num{0};
};
auto listener = new RemoteEventListenerTest();
remote_listeners.emplace_back(listener);
Options options = CurrentOptions();
ReopenWithCompactionService(&options);
for (int i = 0; i < 20; i++) {
for (int j = 0; j < 10; j++) {
int key_id = i * 10 + j;
ASSERT_OK(Put(Key(key_id), "value" + ToString(key_id)));
}
ASSERT_OK(Flush());
}
for (int i = 0; i < 10; i++) {
for (int j = 0; j < 10; j++) {
int key_id = i * 20 + j * 2;
ASSERT_OK(Put(Key(key_id), "value_new" + ToString(key_id)));
}
ASSERT_OK(Flush());
}
ASSERT_OK(dbfull()->TEST_WaitForCompact());
// check the events are triggered
ASSERT_TRUE(listener->file_io_notified > 0);
ASSERT_TRUE(listener->file_creation_started > 0);
ASSERT_TRUE(listener->file_created > 0);
ASSERT_TRUE(listener->compaction_num > 0);
ASSERT_TRUE(listener->on_going_compactions.empty());
// verify result
for (int i = 0; i < 200; i++) {
auto result = Get(Key(i));
if (i % 2) {
ASSERT_EQ(result, "value" + ToString(i));
} else {
ASSERT_EQ(result, "value_new" + ToString(i));
}
}
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {

View File

@ -26,6 +26,7 @@
#include "rocksdb/db.h"
#include "rocksdb/env.h"
#include "rocksdb/table.h"
#include "rocksdb/utilities/transaction_db.h"
#include "rocksdb/write_batch.h"
#include "table/block_based/block_based_table_builder.h"
#include "table/meta_blocks.h"
@ -275,6 +276,42 @@ class CorruptionTest : public testing::Test {
}
return Slice(*storage);
}
void GetSortedWalFiles(std::vector<uint64_t>& file_nums) {
std::vector<std::string> tmp_files;
ASSERT_OK(env_->GetChildren(dbname_, &tmp_files));
FileType type = kWalFile;
for (const auto& file : tmp_files) {
uint64_t number = 0;
if (ParseFileName(file, &number, &type) && type == kWalFile) {
file_nums.push_back(number);
}
}
std::sort(file_nums.begin(), file_nums.end());
}
void CorruptFileWithTruncation(FileType file, uint64_t number,
uint64_t bytes_to_truncate = 0) {
std::string path;
switch (file) {
case FileType::kWalFile:
path = LogFileName(dbname_, number);
break;
// TODO: Add other file types as this method is being used for those file
// types.
default:
return;
}
uint64_t old_size = 0;
ASSERT_OK(env_->GetFileSize(path, &old_size));
assert(old_size > bytes_to_truncate);
uint64_t new_size = old_size - bytes_to_truncate;
// If bytes_to_truncate == 0, it will do full truncation.
if (bytes_to_truncate == 0) {
new_size = old_size;
}
ASSERT_OK(test::TruncateFile(env_, path, new_size));
}
};
TEST_F(CorruptionTest, Recovery) {
@ -912,6 +949,313 @@ TEST_F(CorruptionTest, VerifyWholeTableChecksum) {
ASSERT_EQ(1, count);
}
class CrashDuringRecoveryWithCorruptionTest
: public CorruptionTest,
public testing::WithParamInterface<std::tuple<bool, bool>> {
public:
explicit CrashDuringRecoveryWithCorruptionTest()
: CorruptionTest(),
avoid_flush_during_recovery_(std::get<0>(GetParam())),
track_and_verify_wals_in_manifest_(std::get<1>(GetParam())) {}
protected:
const bool avoid_flush_during_recovery_;
const bool track_and_verify_wals_in_manifest_;
};
INSTANTIATE_TEST_CASE_P(CorruptionTest, CrashDuringRecoveryWithCorruptionTest,
::testing::Values(std::make_tuple(true, false),
std::make_tuple(false, false),
std::make_tuple(true, true),
std::make_tuple(false, true)));
// In case of non-TransactionDB with avoid_flush_during_recovery = true, RocksDB
// won't flush the data from WAL to L0 for all column families if possible. As a
// result, not all column families can increase their log_numbers, and
// min_log_number_to_keep won't change.
// It may prematurely persist a new MANIFEST even before we can declare the DB
// is in consistent state after recovery (this is when the new WAL is synced)
// and advances log_numbers for some column families.
//
// If there is power failure before we sync the new WAL, we will end up in
// a situation in which after persisting the MANIFEST, RocksDB will see some
// column families' log_numbers larger than the corrupted wal, and
// "Column family inconsistency: SST file contains data beyond the point of
// corruption" error will be hit, causing recovery to fail.
//
// After adding the fix, corrupted WALs whose numbers are larger than the
// corrupted wal and smaller than the new WAL are moved to a separate folder.
// Only after new WAL is synced, RocksDB persist a new MANIFEST with column
// families to ensure RocksDB is in consistent state.
// RocksDB writes an empty WriteBatch as a sentinel to the new WAL which is
// synced immediately afterwards. The sequence number of the sentinel
// WriteBatch will be the next sequence number immediately after the largest
// sequence number recovered from previous WALs and MANIFEST because of which DB
// will be in consistent state.
TEST_P(CrashDuringRecoveryWithCorruptionTest, CrashDuringRecovery) {
CloseDb();
Options options;
options.track_and_verify_wals_in_manifest =
track_and_verify_wals_in_manifest_;
options.wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery;
options.avoid_flush_during_recovery = false;
options.env = env_;
ASSERT_OK(DestroyDB(dbname_, options));
options.create_if_missing = true;
options.max_write_buffer_number = 3;
Reopen(&options);
Status s;
const std::string test_cf_name = "test_cf";
ColumnFamilyHandle* cfh = nullptr;
s = db_->CreateColumnFamily(options, test_cf_name, &cfh);
ASSERT_OK(s);
delete cfh;
CloseDb();
std::vector<ColumnFamilyDescriptor> cf_descs;
cf_descs.emplace_back(kDefaultColumnFamilyName, options);
cf_descs.emplace_back(test_cf_name, options);
std::vector<ColumnFamilyHandle*> handles;
// 1. Open and populate the DB. Write and flush default_cf several times to
// advance wal number so that some column families have advanced log_number
// while other don't.
{
ASSERT_OK(DB::Open(options, dbname_, cf_descs, &handles, &db_));
auto* dbimpl = static_cast_with_check<DBImpl>(db_);
assert(dbimpl);
// Write one key to test_cf.
ASSERT_OK(db_->Put(WriteOptions(), handles[1], "old_key", "dontcare"));
// Write to default_cf and flush this cf several times to advance wal
// number.
for (int i = 0; i < 2; ++i) {
ASSERT_OK(db_->Put(WriteOptions(), "key" + std::to_string(i), "value"));
ASSERT_OK(dbimpl->TEST_SwitchMemtable());
}
ASSERT_OK(db_->Put(WriteOptions(), handles[1], "dontcare", "dontcare"));
for (auto* h : handles) {
delete h;
}
handles.clear();
CloseDb();
}
// 2. Corrupt second last wal file to emulate power reset which caused the DB
// to lose the un-synced WAL.
{
std::vector<uint64_t> file_nums;
GetSortedWalFiles(file_nums);
size_t size = file_nums.size();
uint64_t log_num = file_nums[size - 2];
CorruptFileWithTruncation(FileType::kWalFile, log_num,
/*bytes_to_truncate=*/8);
}
// 3. After first crash reopen the DB which contains corrupted WAL. Default
// family has higher log number than corrupted wal number.
//
// Case1: If avoid_flush_during_recovery = true, RocksDB won't flush the data
// from WAL to L0 for all column families (test_cf_name in this case). As a
// result, not all column families can increase their log_numbers, and
// min_log_number_to_keep won't change.
//
// Case2: If avoid_flush_during_recovery = false, all column families have
// flushed their data from WAL to L0 during recovery, and none of them will
// ever need to read the WALs again.
{
options.avoid_flush_during_recovery = avoid_flush_during_recovery_;
s = DB::Open(options, dbname_, cf_descs, &handles, &db_);
ASSERT_OK(s);
for (auto* h : handles) {
delete h;
}
handles.clear();
CloseDb();
}
// 4. Corrupt max_wal_num to emulate second power reset which caused the
// DB to again lose the un-synced WAL.
{
std::vector<uint64_t> file_nums;
GetSortedWalFiles(file_nums);
size_t size = file_nums.size();
uint64_t log_num = file_nums[size - 1];
CorruptFileWithTruncation(FileType::kWalFile, log_num);
}
// 5. After second crash reopen the db with second corruption. Default family
// has higher log number than corrupted wal number.
//
// Case1: If avoid_flush_during_recovery = true, we persist a new
// MANIFEST with advanced log_numbers for some column families only after
// syncing the WAL. So during second crash, RocksDB will skip the corrupted
// WAL files as they have been moved to different folder. Since newly synced
// WAL file's sequence number (sentinel WriteBatch) will be the next
// sequence number immediately after the largest sequence number recovered
// from previous WALs and MANIFEST, db will be in consistent state and opens
// successfully.
//
// Case2: If avoid_flush_during_recovery = false, the corrupted WAL is below
// this number. So during a second crash after persisting the new MANIFEST,
// RocksDB will skip the corrupted WAL(s) because they are all below this
// bound. Therefore, we won't hit the "column family inconsistency" error
// message.
{
options.avoid_flush_during_recovery = avoid_flush_during_recovery_;
ASSERT_OK(DB::Open(options, dbname_, cf_descs, &handles, &db_));
for (auto* h : handles) {
delete h;
}
handles.clear();
CloseDb();
}
}
// In case of TransactionDB, it enables two-phase-commit. The prepare section of
// an uncommitted transaction always need to be kept. Even if we perform flush
// during recovery, we may still need to hold an old WAL. The
// min_log_number_to_keep won't change, and "Column family inconsistency: SST
// file contains data beyond the point of corruption" error will be hit, causing
// recovery to fail.
//
// After adding the fix, corrupted WALs whose numbers are larger than the
// corrupted wal and smaller than the new WAL are moved to a separate folder.
// Only after new WAL is synced, RocksDB persist a new MANIFEST with column
// families to ensure RocksDB is in consistent state.
// RocksDB writes an empty WriteBatch as a sentinel to the new WAL which is
// synced immediately afterwards. The sequence number of the sentinel
// WriteBatch will be the next sequence number immediately after the largest
// sequence number recovered from previous WALs and MANIFEST because of which DB
// will be in consistent state.
TEST_P(CrashDuringRecoveryWithCorruptionTest, TxnDbCrashDuringRecovery) {
CloseDb();
Options options;
options.wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery;
options.track_and_verify_wals_in_manifest =
track_and_verify_wals_in_manifest_;
options.avoid_flush_during_recovery = false;
options.env = env_;
ASSERT_OK(DestroyDB(dbname_, options));
options.create_if_missing = true;
options.max_write_buffer_number = 3;
Reopen(&options);
// Create cf test_cf_name.
ColumnFamilyHandle* cfh = nullptr;
const std::string test_cf_name = "test_cf";
Status s = db_->CreateColumnFamily(options, test_cf_name, &cfh);
ASSERT_OK(s);
delete cfh;
CloseDb();
std::vector<ColumnFamilyDescriptor> cf_descs;
cf_descs.emplace_back(kDefaultColumnFamilyName, options);
cf_descs.emplace_back(test_cf_name, options);
std::vector<ColumnFamilyHandle*> handles;
TransactionDB* txn_db = nullptr;
TransactionDBOptions txn_db_opts;
// 1. Open and populate the DB. Write and flush default_cf several times to
// advance wal number so that some column families have advanced log_number
// while other don't.
{
options.avoid_flush_during_recovery = avoid_flush_during_recovery_;
ASSERT_OK(TransactionDB::Open(options, txn_db_opts, dbname_, cf_descs,
&handles, &txn_db));
auto* txn = txn_db->BeginTransaction(WriteOptions(), TransactionOptions());
// Put cf1
ASSERT_OK(txn->Put(handles[1], "foo", "value"));
ASSERT_OK(txn->SetName("txn0"));
ASSERT_OK(txn->Prepare());
delete txn;
txn = nullptr;
auto* dbimpl = static_cast_with_check<DBImpl>(txn_db->GetRootDB());
assert(dbimpl);
// Put and flush cf0
for (int i = 0; i < 2; ++i) {
ASSERT_OK(txn_db->Put(WriteOptions(), "dontcare", "value"));
ASSERT_OK(dbimpl->TEST_SwitchMemtable());
}
// Put cf1
txn = txn_db->BeginTransaction(WriteOptions(), TransactionOptions());
ASSERT_OK(txn->Put(handles[1], "foo1", "value"));
ASSERT_OK(txn->Commit());
delete txn;
txn = nullptr;
for (auto* h : handles) {
delete h;
}
handles.clear();
delete txn_db;
}
// 2. Corrupt second last wal to emulate power reset which caused the DB to
// lose the un-synced WAL.
{
std::vector<uint64_t> file_nums;
GetSortedWalFiles(file_nums);
size_t size = file_nums.size();
uint64_t log_num = file_nums[size - 2];
CorruptFileWithTruncation(FileType::kWalFile, log_num,
/*bytes_to_truncate=*/8);
}
// 3. After first crash reopen the DB which contains corrupted WAL. Default
// family has higher log number than corrupted wal number. There may be old
// WAL files that it must not delete because they can contain data of
// uncommitted transactions. As a result, min_log_number_to_keep won't change.
{
options.avoid_flush_during_recovery = avoid_flush_during_recovery_;
ASSERT_OK(TransactionDB::Open(options, txn_db_opts, dbname_, cf_descs,
&handles, &txn_db));
for (auto* h : handles) {
delete h;
}
handles.clear();
delete txn_db;
}
// 4. Corrupt max_wal_num to emulate second power reset which caused the
// DB to again lose the un-synced WAL.
{
std::vector<uint64_t> file_nums;
GetSortedWalFiles(file_nums);
size_t size = file_nums.size();
uint64_t log_num = file_nums[size - 1];
CorruptFileWithTruncation(FileType::kWalFile, log_num);
}
// 5. After second crash reopen the db with second corruption. Default family
// has higher log number than corrupted wal number.
// We persist a new MANIFEST with advanced log_numbers for some column
// families only after syncing the WAL. So during second crash, RocksDB will
// skip the corrupted WAL files as they have been moved to different folder.
// Since newly synced WAL file's sequence number (sentinel WriteBatch) will be
// the next sequence number immediately after the largest sequence number
// recovered from previous WALs and MANIFEST, db will be in consistent state
// and opens successfully.
{
options.avoid_flush_during_recovery = false;
ASSERT_OK(TransactionDB::Open(options, txn_db_opts, dbname_, cf_descs,
&handles, &txn_db));
for (auto* h : handles) {
delete h;
}
delete txn_db;
}
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {

View File

@ -175,7 +175,10 @@ TEST_F(DBBasicTest, ReadOnlyDB) {
ASSERT_TRUE(db_->SyncWAL().IsNotSupported());
}
TEST_F(DBBasicTest, ReadOnlyDBWithWriteDBIdToManifestSet) {
// TODO akanksha: Update the test to check that combination
// does not actually write to FS (use open read-only with
// CompositeEnvWrapper+ReadOnlyFileSystem).
TEST_F(DBBasicTest, DISABLED_ReadOnlyDBWithWriteDBIdToManifestSet) {
ASSERT_OK(Put("foo", "v1"));
ASSERT_OK(Put("bar", "v2"));
ASSERT_OK(Put("foo", "v3"));

View File

@ -1404,21 +1404,11 @@ TEST_F(DBBlockCacheTest, CacheEntryRoleStats) {
ASSERT_TRUE(
db_->GetMapProperty(DB::Properties::kBlockCacheEntryStats, &values));
EXPECT_EQ(
ToString(expected[static_cast<size_t>(CacheEntryRole::kIndexBlock)]),
values["count.index-block"]);
EXPECT_EQ(
ToString(expected[static_cast<size_t>(CacheEntryRole::kDataBlock)]),
values["count.data-block"]);
EXPECT_EQ(
ToString(expected[static_cast<size_t>(CacheEntryRole::kFilterBlock)]),
values["count.filter-block"]);
EXPECT_EQ(
ToString(
prev_expected[static_cast<size_t>(CacheEntryRole::kWriteBuffer)]),
values["count.write-buffer"]);
EXPECT_EQ(ToString(expected[static_cast<size_t>(CacheEntryRole::kMisc)]),
values["count.misc"]);
for (size_t i = 0; i < kNumCacheEntryRoles; ++i) {
auto role = static_cast<CacheEntryRole>(i);
EXPECT_EQ(ToString(expected[i]),
values[BlockCacheEntryStatsMapKeys::EntryCount(role)]);
}
// Add one for kWriteBuffer
{
@ -1431,7 +1421,8 @@ TEST_F(DBBlockCacheTest, CacheEntryRoleStats) {
env_->MockSleepForSeconds(1);
EXPECT_EQ(ToString(prev_expected[static_cast<size_t>(
CacheEntryRole::kWriteBuffer)]),
values["count.write-buffer"]);
values[BlockCacheEntryStatsMapKeys::EntryCount(
CacheEntryRole::kWriteBuffer)]);
// Not enough for a "background" miss but enough for a "foreground" miss
env_->MockSleepForSeconds(45);
@ -1440,7 +1431,8 @@ TEST_F(DBBlockCacheTest, CacheEntryRoleStats) {
EXPECT_EQ(
ToString(
expected[static_cast<size_t>(CacheEntryRole::kWriteBuffer)]),
values["count.write-buffer"]);
values[BlockCacheEntryStatsMapKeys::EntryCount(
CacheEntryRole::kWriteBuffer)]);
}
prev_expected = expected;

View File

@ -1074,7 +1074,8 @@ class DBFilterConstructionReserveMemoryTestWithParam
};
INSTANTIATE_TEST_CASE_P(
BlockBasedTableOptions, DBFilterConstructionReserveMemoryTestWithParam,
DBFilterConstructionReserveMemoryTestWithParam,
DBFilterConstructionReserveMemoryTestWithParam,
::testing::Values(std::make_tuple(false, kFastLocalBloom, false, false),
std::make_tuple(true, kFastLocalBloom, false, false),
@ -1090,7 +1091,7 @@ INSTANTIATE_TEST_CASE_P(
std::make_tuple(true, kDeprecatedBlock, false, false),
std::make_tuple(true, kLegacyBloom, false, false)));
// TODO: Speed up this test.
// TODO: Speed up this test, and reduce disk space usage (~700MB)
// The current test inserts many keys (on the scale of dummy entry size)
// in order to make small memory user (e.g, final filter, partitioned hash
// entries/filter/banding) , which is proportional to the number of

View File

@ -2409,6 +2409,30 @@ TEST_P(DBCompactionTestWithParam, LevelCompactionCFPathUse) {
check_getvalues();
{ // Also verify GetLiveFilesStorageInfo with db_paths / cf_paths
std::vector<LiveFileStorageInfo> new_infos;
LiveFilesStorageInfoOptions lfsio;
lfsio.wal_size_for_flush = UINT64_MAX; // no flush
ASSERT_OK(db_->GetLiveFilesStorageInfo(lfsio, &new_infos));
std::unordered_map<std::string, int> live_sst_by_dir;
for (auto& info : new_infos) {
if (info.file_type == kTableFile) {
live_sst_by_dir[info.directory]++;
// Verify file on disk (no directory confusion)
uint64_t size;
ASSERT_OK(env_->GetFileSize(
info.directory + "/" + info.relative_filename, &size));
ASSERT_EQ(info.size, size);
}
}
ASSERT_EQ(3U * 3U, live_sst_by_dir.size());
for (auto& paths : {options.db_paths, cf_opt1.cf_paths, cf_opt2.cf_paths}) {
ASSERT_EQ(1, live_sst_by_dir[paths[0].path]);
ASSERT_EQ(4, live_sst_by_dir[paths[1].path]);
ASSERT_EQ(2, live_sst_by_dir[paths[2].path]);
}
}
ReopenWithColumnFamilies({"default", "one", "two"}, option_vector);
check_getvalues();
@ -6484,20 +6508,29 @@ TEST_F(DBCompactionTest, CompactionWithBlobGCError_CorruptIndex) {
ASSERT_OK(Put(third_key, third_value));
constexpr char fourth_key[] = "fourth_key";
constexpr char corrupt_blob_index[] = "foobar";
WriteBatch batch;
ASSERT_OK(WriteBatchInternal::PutBlobIndex(&batch, 0, fourth_key,
corrupt_blob_index));
ASSERT_OK(db_->Write(WriteOptions(), &batch));
constexpr char fourth_value[] = "fourth_value";
ASSERT_OK(Put(fourth_key, fourth_value));
ASSERT_OK(Flush());
SyncPoint::GetInstance()->SetCallBack(
"CompactionIterator::GarbageCollectBlobIfNeeded::TamperWithBlobIndex",
[](void* arg) {
Slice* const blob_index = static_cast<Slice*>(arg);
assert(blob_index);
assert(!blob_index->empty());
blob_index->remove_prefix(1);
});
SyncPoint::GetInstance()->EnableProcessing();
constexpr Slice* begin = nullptr;
constexpr Slice* end = nullptr;
ASSERT_TRUE(
db_->CompactRange(CompactRangeOptions(), begin, end).IsCorruption());
SyncPoint::GetInstance()->DisableProcessing();
SyncPoint::GetInstance()->ClearAllCallBacks();
}
TEST_F(DBCompactionTest, CompactionWithBlobGCError_InlinedTTLIndex) {

View File

@ -101,6 +101,7 @@
#include "util/compression.h"
#include "util/crc32c.h"
#include "util/defer.h"
#include "util/hash_containers.h"
#include "util/mutexlock.h"
#include "util/stop_watch.h"
#include "util/string_util.h"
@ -2000,7 +2001,7 @@ std::vector<Status> DBImpl::MultiGet(
SequenceNumber consistent_seqnum;
std::unordered_map<uint32_t, MultiGetColumnFamilyData> multiget_cf_data(
UnorderedMap<uint32_t, MultiGetColumnFamilyData> multiget_cf_data(
column_family.size());
for (auto cf : column_family) {
auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(cf);
@ -2012,13 +2013,13 @@ std::vector<Status> DBImpl::MultiGet(
}
std::function<MultiGetColumnFamilyData*(
std::unordered_map<uint32_t, MultiGetColumnFamilyData>::iterator&)>
UnorderedMap<uint32_t, MultiGetColumnFamilyData>::iterator&)>
iter_deref_lambda =
[](std::unordered_map<uint32_t, MultiGetColumnFamilyData>::iterator&
[](UnorderedMap<uint32_t, MultiGetColumnFamilyData>::iterator&
cf_iter) { return &cf_iter->second; };
bool unref_only =
MultiCFSnapshot<std::unordered_map<uint32_t, MultiGetColumnFamilyData>>(
MultiCFSnapshot<UnorderedMap<uint32_t, MultiGetColumnFamilyData>>(
read_options, nullptr, iter_deref_lambda, &multiget_cf_data,
&consistent_seqnum);
@ -3681,6 +3682,11 @@ Status DBImpl::GetUpdatesSince(
SequenceNumber seq, std::unique_ptr<TransactionLogIterator>* iter,
const TransactionLogIterator::ReadOptions& read_options) {
RecordTick(stats_, GET_UPDATES_SINCE_CALLS);
if (seq_per_batch_) {
return Status::NotSupported(
"This API is not yet compatible with write-prepared/write-unprepared "
"transactions");
}
if (seq > versions_->LastSequence()) {
return Status::NotFound("Requested sequence not yet written in the db");
}

View File

@ -1240,6 +1240,43 @@ class DBImpl : public DB {
std::atomic<bool> shutting_down_;
// RecoveryContext struct stores the context about version edits along
// with corresponding column_family_data and column_family_options.
class RecoveryContext {
public:
~RecoveryContext() {
for (auto& edit_list : edit_lists_) {
for (auto* edit : edit_list) {
delete edit;
}
edit_list.clear();
}
cfds_.clear();
mutable_cf_opts_.clear();
edit_lists_.clear();
files_to_delete_.clear();
}
void UpdateVersionEdits(ColumnFamilyData* cfd, const VersionEdit& edit) {
if (map_.find(cfd->GetID()) == map_.end()) {
uint32_t size = static_cast<uint32_t>(map_.size());
map_.emplace(cfd->GetID(), size);
cfds_.emplace_back(cfd);
mutable_cf_opts_.emplace_back(cfd->GetLatestMutableCFOptions());
edit_lists_.emplace_back(autovector<VersionEdit*>());
}
uint32_t i = map_[cfd->GetID()];
edit_lists_[i].emplace_back(new VersionEdit(edit));
}
std::unordered_map<uint32_t, uint32_t> map_; // cf_id to index;
autovector<ColumnFamilyData*> cfds_;
autovector<const MutableCFOptions*> mutable_cf_opts_;
autovector<autovector<VersionEdit*>> edit_lists_;
// files_to_delete_ contains sst files
std::set<std::string> files_to_delete_;
};
// Except in DB::Open(), WriteOptionsFile can only be called when:
// Persist options to options file.
// If need_mutex_lock = false, the method will lock DB mutex.
@ -1367,16 +1404,19 @@ class DBImpl : public DB {
// be made to the descriptor are added to *edit.
// recovered_seq is set to less than kMaxSequenceNumber if the log's tail is
// skipped.
// recovery_ctx stores the context about version edits and all those
// edits are persisted to new Manifest after successfully syncing the new WAL.
virtual Status Recover(
const std::vector<ColumnFamilyDescriptor>& column_families,
bool read_only = false, bool error_if_wal_file_exists = false,
bool error_if_data_exists_in_wals = false,
uint64_t* recovered_seq = nullptr);
uint64_t* recovered_seq = nullptr,
RecoveryContext* recovery_ctx = nullptr);
virtual bool OwnTablesAndLogs() const { return true; }
// Set DB identity file, and write DB ID to manifest if necessary.
Status SetDBId(bool read_only);
Status SetDBId(bool read_only, RecoveryContext* recovery_ctx);
// REQUIRES: db mutex held when calling this function, but the db mutex can
// be released and re-acquired. Db mutex will be held when the function
@ -1385,12 +1425,15 @@ class DBImpl : public DB {
// not referenced in the MANIFEST (e.g.
// 1. It's best effort recovery;
// 2. The VersionEdits referencing the SST files are appended to
// MANIFEST, DB crashes when syncing the MANIFEST, the VersionEdits are
// RecoveryContext, DB crashes when syncing the MANIFEST, the VersionEdits are
// still not synced to MANIFEST during recovery.)
// We delete these SST files. In the
// It stores the SST files to be deleted in RecoveryContext. In the
// meantime, we find out the largest file number present in the paths, and
// bump up the version set's next_file_number_ to be 1 + largest_file_number.
Status DeleteUnreferencedSstFiles();
// recovery_ctx stores the context about version edits and files to be
// deleted. All those edits are persisted to new Manifest after successfully
// syncing the new WAL.
Status DeleteUnreferencedSstFiles(RecoveryContext* recovery_ctx);
// SetDbSessionId() should be called in the constuctor DBImpl()
// to ensure that db_session_id_ gets updated every time the DB is opened
@ -1400,6 +1443,11 @@ class DBImpl : public DB {
Status FailIfTsSizesMismatch(const ColumnFamilyHandle* column_family,
const Slice& ts) const;
// recovery_ctx stores the context about version edits and
// LogAndApplyForRecovery persist all those edits to new Manifest after
// successfully syncing new WAL.
Status LogAndApplyForRecovery(const RecoveryContext& recovery_ctx);
private:
friend class DB;
friend class ErrorHandler;
@ -1654,9 +1702,10 @@ class DBImpl : public DB {
// REQUIRES: log_numbers are sorted in ascending order
// corrupted_log_found is set to true if we recover from a corrupted log file.
Status RecoverLogFiles(const std::vector<uint64_t>& log_numbers,
Status RecoverLogFiles(std::vector<uint64_t>& log_numbers,
SequenceNumber* next_sequence, bool read_only,
bool* corrupted_log_found);
bool* corrupted_log_found,
RecoveryContext* recovery_ctx);
// The following two methods are used to flush a memtable to
// storage. The first one is used at database RecoveryTime (when the
@ -1666,6 +1715,12 @@ class DBImpl : public DB {
Status WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd,
MemTable* mem, VersionEdit* edit);
// Move all the WAL files starting from corrupted WAL found to
// max_wal_number to avoid column family inconsistency error on recovery. It
// also removes the deleted file from the vector wal_numbers.
void MoveCorruptedWalFiles(std::vector<uint64_t>& wal_numbers,
uint64_t corrupted_wal_number);
// Get the size of a log file and, if truncate is true, truncate the
// log file to its actual size, thereby freeing preallocated space.
// Return success even if truncate fails
@ -2380,11 +2435,6 @@ class DBImpl : public DB {
// DB::Open() or passed to us
bool own_sfm_;
// Default value is 0 which means ALL deletes are
// preserved. Note that this has no effect if preserve_deletes is false.
const std::atomic<SequenceNumber> preserve_deletes_seqnum_{0};
const bool preserve_deletes_ = false;
// Flag to check whether Close() has been called on this DB
bool closed_;
// save the closing status, for re-calling the close()

View File

@ -1363,11 +1363,11 @@ Status DBImpl::CompactFilesImpl(
CompactionJob compaction_job(
job_context->job_id, c.get(), immutable_db_options_, mutable_db_options_,
file_options_for_compaction_, versions_.get(), &shutting_down_,
preserve_deletes_seqnum_.load(), log_buffer, directories_.GetDbDir(),
log_buffer, directories_.GetDbDir(),
GetDataDir(c->column_family_data(), c->output_path_id()),
GetDataDir(c->column_family_data(), 0), stats_, &mutex_, &error_handler_,
snapshot_seqs, earliest_write_conflict_snapshot, snapshot_checker,
table_cache_, &event_logger_,
job_context, table_cache_, &event_logger_,
c->mutable_cf_options()->paranoid_file_checks,
c->mutable_cf_options()->report_bg_io_stats, dbname_,
&compaction_job_stats, Env::Priority::USER, io_tracer_,
@ -3357,12 +3357,11 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
CompactionJob compaction_job(
job_context->job_id, c.get(), immutable_db_options_,
mutable_db_options_, file_options_for_compaction_, versions_.get(),
&shutting_down_, preserve_deletes_seqnum_.load(), log_buffer,
directories_.GetDbDir(),
&shutting_down_, log_buffer, directories_.GetDbDir(),
GetDataDir(c->column_family_data(), c->output_path_id()),
GetDataDir(c->column_family_data(), 0), stats_, &mutex_,
&error_handler_, snapshot_seqs, earliest_write_conflict_snapshot,
snapshot_checker, table_cache_, &event_logger_,
snapshot_checker, job_context, table_cache_, &event_logger_,
c->mutable_cf_options()->paranoid_file_checks,
c->mutable_cf_options()->report_bg_io_stats, dbname_,
&compaction_job_stats, thread_pri, io_tracer_,

View File

@ -863,7 +863,7 @@ uint64_t PrecomputeMinLogNumberToKeep2PC(
return min_log_number_to_keep;
}
Status DBImpl::SetDBId(bool read_only) {
Status DBImpl::SetDBId(bool read_only, RecoveryContext* recovery_ctx) {
Status s;
// Happens when immutable_db_options_.write_dbid_to_manifest is set to true
// the very first time.
@ -890,14 +890,14 @@ Status DBImpl::SetDBId(bool read_only) {
}
s = GetDbIdentityFromIdentityFile(&db_id_);
if (immutable_db_options_.write_dbid_to_manifest && s.ok()) {
assert(!read_only);
assert(recovery_ctx != nullptr);
assert(versions_->GetColumnFamilySet() != nullptr);
VersionEdit edit;
edit.SetDBId(db_id_);
Options options;
MutableCFOptions mutable_cf_options(options);
versions_->db_id_ = db_id_;
s = versions_->LogAndApply(versions_->GetColumnFamilySet()->GetDefault(),
mutable_cf_options, &edit, &mutex_, nullptr,
/* new_descriptor_log */ false);
recovery_ctx->UpdateVersionEdits(
versions_->GetColumnFamilySet()->GetDefault(), edit);
}
} else if (!read_only) {
s = SetIdentityFile(env_, dbname_, db_id_);
@ -905,7 +905,7 @@ Status DBImpl::SetDBId(bool read_only) {
return s;
}
Status DBImpl::DeleteUnreferencedSstFiles() {
Status DBImpl::DeleteUnreferencedSstFiles(RecoveryContext* recovery_ctx) {
mutex_.AssertHeld();
std::vector<std::string> paths;
paths.push_back(NormalizePath(dbname_ + std::string(1, kFilePathSeparator)));
@ -925,7 +925,6 @@ Status DBImpl::DeleteUnreferencedSstFiles() {
uint64_t next_file_number = versions_->current_next_file_number();
uint64_t largest_file_number = next_file_number;
std::set<std::string> files_to_delete;
Status s;
for (const auto& path : paths) {
std::vector<std::string> files;
@ -943,8 +942,9 @@ Status DBImpl::DeleteUnreferencedSstFiles() {
const std::string normalized_fpath = path + fname;
largest_file_number = std::max(largest_file_number, number);
if (type == kTableFile && number >= next_file_number &&
files_to_delete.find(normalized_fpath) == files_to_delete.end()) {
files_to_delete.insert(normalized_fpath);
recovery_ctx->files_to_delete_.find(normalized_fpath) ==
recovery_ctx->files_to_delete_.end()) {
recovery_ctx->files_to_delete_.insert(normalized_fpath);
}
}
}
@ -961,21 +961,7 @@ Status DBImpl::DeleteUnreferencedSstFiles() {
assert(versions_->GetColumnFamilySet());
ColumnFamilyData* default_cfd = versions_->GetColumnFamilySet()->GetDefault();
assert(default_cfd);
s = versions_->LogAndApply(
default_cfd, *default_cfd->GetLatestMutableCFOptions(), &edit, &mutex_,
directories_.GetDbDir(), /*new_descriptor_log*/ false);
if (!s.ok()) {
return s;
}
mutex_.Unlock();
for (const auto& fname : files_to_delete) {
s = env_->DeleteFile(fname);
if (!s.ok()) {
break;
}
}
mutex_.Lock();
recovery_ctx->UpdateVersionEdits(default_cfd, edit);
return s;
}

View File

@ -399,7 +399,7 @@ IOStatus Directories::SetDirectories(FileSystem* fs, const std::string& dbname,
Status DBImpl::Recover(
const std::vector<ColumnFamilyDescriptor>& column_families, bool read_only,
bool error_if_wal_file_exists, bool error_if_data_exists_in_wals,
uint64_t* recovered_seq) {
uint64_t* recovered_seq, RecoveryContext* recovery_ctx) {
mutex_.AssertHeld();
bool is_new_db = false;
@ -518,9 +518,10 @@ Status DBImpl::Recover(
if (!s.ok()) {
return s;
}
s = SetDBId(read_only);
s = SetDBId(read_only, recovery_ctx);
if (s.ok() && !read_only) {
s = DeleteUnreferencedSstFiles();
s = DeleteUnreferencedSstFiles(recovery_ctx);
}
if (immutable_db_options_.paranoid_checks && s.ok()) {
@ -535,10 +536,6 @@ Status DBImpl::Recover(
}
}
}
// DB mutex is already held
if (s.ok() && immutable_db_options_.persist_stats_to_disk) {
s = InitPersistStatsColumnFamily();
}
std::vector<std::string> files_in_wal_dir;
if (s.ok()) {
@ -608,7 +605,10 @@ Status DBImpl::Recover(
WalNumber max_wal_number =
versions_->GetWalSet().GetWals().rbegin()->first;
edit.DeleteWalsBefore(max_wal_number + 1);
s = versions_->LogAndApplyToDefaultColumnFamily(&edit, &mutex_);
assert(recovery_ctx != nullptr);
assert(versions_->GetColumnFamilySet() != nullptr);
recovery_ctx->UpdateVersionEdits(
versions_->GetColumnFamilySet()->GetDefault(), edit);
}
if (!s.ok()) {
return s;
@ -644,8 +644,8 @@ Status DBImpl::Recover(
std::sort(wals.begin(), wals.end());
bool corrupted_wal_found = false;
s = RecoverLogFiles(wals, &next_sequence, read_only,
&corrupted_wal_found);
s = RecoverLogFiles(wals, &next_sequence, read_only, &corrupted_wal_found,
recovery_ctx);
if (corrupted_wal_found && recovered_seq != nullptr) {
*recovered_seq = next_sequence;
}
@ -805,10 +805,30 @@ Status DBImpl::InitPersistStatsColumnFamily() {
return s;
}
Status DBImpl::LogAndApplyForRecovery(const RecoveryContext& recovery_ctx) {
mutex_.AssertHeld();
assert(versions_->descriptor_log_ == nullptr);
Status s = versions_->LogAndApply(
recovery_ctx.cfds_, recovery_ctx.mutable_cf_opts_,
recovery_ctx.edit_lists_, &mutex_, directories_.GetDbDir());
if (s.ok() && !(recovery_ctx.files_to_delete_.empty())) {
mutex_.Unlock();
for (const auto& fname : recovery_ctx.files_to_delete_) {
s = env_->DeleteFile(fname);
if (!s.ok()) {
break;
}
}
mutex_.Lock();
}
return s;
}
// REQUIRES: wal_numbers are sorted in ascending order
Status DBImpl::RecoverLogFiles(const std::vector<uint64_t>& wal_numbers,
Status DBImpl::RecoverLogFiles(std::vector<uint64_t>& wal_numbers,
SequenceNumber* next_sequence, bool read_only,
bool* corrupted_wal_found) {
bool* corrupted_wal_found,
RecoveryContext* recovery_ctx) {
struct LogReporter : public log::Reader::Reporter {
Env* env;
Logger* info_log;
@ -833,6 +853,7 @@ Status DBImpl::RecoverLogFiles(const std::vector<uint64_t>& wal_numbers,
edit.SetColumnFamily(cfd->GetID());
version_edits.insert({cfd->GetID(), edit});
}
int job_id = next_job_id_.fetch_add(1);
{
auto stream = event_logger_.Log();
@ -1256,6 +1277,7 @@ Status DBImpl::RecoverLogFiles(const std::vector<uint64_t>& wal_numbers,
edit->SetLogNumber(max_wal_number + 1);
}
}
if (status.ok()) {
// we must mark the next log number as used, even though it's
// not actually used. that is because VersionSet assumes
@ -1263,42 +1285,40 @@ Status DBImpl::RecoverLogFiles(const std::vector<uint64_t>& wal_numbers,
// log number
versions_->MarkFileNumberUsed(max_wal_number + 1);
autovector<ColumnFamilyData*> cfds;
autovector<const MutableCFOptions*> cf_opts;
autovector<autovector<VersionEdit*>> edit_lists;
for (auto* cfd : *versions_->GetColumnFamilySet()) {
cfds.push_back(cfd);
cf_opts.push_back(cfd->GetLatestMutableCFOptions());
auto iter = version_edits.find(cfd->GetID());
assert(iter != version_edits.end());
edit_lists.push_back({&iter->second});
if (corrupted_wal_found != nullptr && *corrupted_wal_found == true &&
immutable_db_options_.wal_recovery_mode ==
WALRecoveryMode::kPointInTimeRecovery) {
MoveCorruptedWalFiles(wal_numbers, corrupted_wal_number);
}
assert(recovery_ctx != nullptr);
for (auto* cfd : *versions_->GetColumnFamilySet()) {
auto iter = version_edits.find(cfd->GetID());
assert(iter != version_edits.end());
recovery_ctx->UpdateVersionEdits(cfd, iter->second);
}
std::unique_ptr<VersionEdit> wal_deletion;
if (flushed) {
wal_deletion = std::make_unique<VersionEdit>();
VersionEdit wal_deletion;
if (immutable_db_options_.track_and_verify_wals_in_manifest) {
wal_deletion->DeleteWalsBefore(max_wal_number + 1);
wal_deletion.DeleteWalsBefore(max_wal_number + 1);
}
if (!allow_2pc()) {
// In non-2pc mode, flushing the memtables of the column families
// means we can advance min_log_number_to_keep.
wal_deletion->SetMinLogNumberToKeep(max_wal_number + 1);
wal_deletion.SetMinLogNumberToKeep(max_wal_number + 1);
}
edit_lists.back().push_back(wal_deletion.get());
assert(versions_->GetColumnFamilySet() != nullptr);
recovery_ctx->UpdateVersionEdits(
versions_->GetColumnFamilySet()->GetDefault(), wal_deletion);
}
// write MANIFEST with update
status = versions_->LogAndApply(cfds, cf_opts, edit_lists, &mutex_,
directories_.GetDbDir(),
/*new_descriptor_log=*/true);
}
}
if (status.ok()) {
if (data_seen && !flushed) {
status = RestoreAliveLogFiles(wal_numbers);
} else {
} else if (!wal_numbers.empty()) {
// If there's no data in the WAL, or we flushed all the data, still
// truncate the log file. If the process goes into a crash loop before
// the file is deleted, the preallocated space will never get freed.
@ -1314,6 +1334,48 @@ Status DBImpl::RecoverLogFiles(const std::vector<uint64_t>& wal_numbers,
return status;
}
void DBImpl::MoveCorruptedWalFiles(std::vector<uint64_t>& wal_numbers,
uint64_t corrupted_wal_number) {
size_t num_wals = wal_numbers.size();
// Find the first corrupted wal.
auto iter = std::lower_bound(wal_numbers.begin(), wal_numbers.end(),
corrupted_wal_number);
auto corrupt_start_iter = iter;
// Increment iter to move WAL files from first corrupted_wal_number + 1.
iter++;
std::string archival_path =
ArchivalDirectory(immutable_db_options_.GetWalDir());
Status create_status = env_->CreateDirIfMissing(archival_path);
// create_status is only checked when it needs to move the corrupted WAL files
// to archive folder.
create_status.PermitUncheckedError();
// Truncate the last WAL to reclaim the pre allocated space before
// moving it.
GetLogSizeAndMaybeTruncate(wal_numbers.back(), /*truncate=*/true, nullptr)
.PermitUncheckedError();
// Move all the WAL files from corrupted_wal_number + 1 to last WAL
// (max_wal_number) to avoid column family inconsistency error to archival
// directory. If its unable to create archive dir, it will delete the
// corrupted WAL files.
// We are moving all but first corrupted WAL file to a different folder.
while (iter != wal_numbers.end()) {
LogFileNumberSize log(*iter);
std::string fname = LogFileName(immutable_db_options_.GetWalDir(), *iter);
#ifndef ROCKSDB_LITE
if (create_status.ok()) {
wal_manager_.ArchiveWALFile(fname, *iter);
}
#endif
iter++;
}
wal_numbers.erase(corrupt_start_iter + 1, wal_numbers.begin() + num_wals);
}
Status DBImpl::GetLogSizeAndMaybeTruncate(uint64_t wal_number, bool truncate,
LogFileNumberSize* log_ptr) {
LogFileNumberSize log(wal_number);
@ -1376,7 +1438,8 @@ Status DBImpl::RestoreAliveLogFiles(const std::vector<uint64_t>& wal_numbers) {
// log has such preallocated space, so we only truncate for the last log.
LogFileNumberSize log;
s = GetLogSizeAndMaybeTruncate(
wal_number, /*truncate=*/(wal_number == wal_numbers.back()), &log);
wal_number,
/*truncate=*/(wal_number == wal_numbers.back()), &log);
if (!s.ok()) {
break;
}
@ -1465,9 +1528,9 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd,
dbname_, versions_.get(), immutable_db_options_, tboptions,
file_options_for_compaction_, cfd->table_cache(), iter.get(),
std::move(range_del_iters), &meta, &blob_file_additions,
snapshot_seqs, earliest_write_conflict_snapshot, snapshot_checker,
paranoid_file_checks, cfd->internal_stats(), &io_s, io_tracer_,
BlobFileCreationReason::kRecovery, &event_logger_, job_id,
snapshot_seqs, earliest_write_conflict_snapshot, kMaxSequenceNumber,
snapshot_checker, paranoid_file_checks, cfd->internal_stats(), &io_s,
io_tracer_, BlobFileCreationReason::kRecovery, &event_logger_, job_id,
Env::IO_HIGH, nullptr /* table_properties */, write_hint,
nullptr /*full_history_ts_low*/, &blob_callback_);
LogFlush(immutable_db_options_.info_log);
@ -1737,9 +1800,13 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname,
impl->wal_in_db_path_ = impl->immutable_db_options_.IsWalDirSameAsDBPath();
impl->mutex_.Lock();
RecoveryContext recovery_ctx;
// Handles create_if_missing, error_if_exists
uint64_t recovered_seq(kMaxSequenceNumber);
s = impl->Recover(column_families, false, false, false, &recovered_seq);
s = impl->Recover(column_families, false, false, false, &recovered_seq,
&recovery_ctx);
if (s.ok()) {
uint64_t new_log_number = impl->versions_->NewFileNumber();
log::Writer* new_log = nullptr;
@ -1755,6 +1822,55 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname,
impl->logs_.emplace_back(new_log_number, new_log);
}
if (s.ok()) {
if (impl->two_write_queues_) {
impl->log_write_mutex_.Lock();
}
impl->alive_log_files_.push_back(
DBImpl::LogFileNumberSize(impl->logfile_number_));
impl->alive_log_files_tail_ = impl->alive_log_files_.rbegin();
if (impl->two_write_queues_) {
impl->log_write_mutex_.Unlock();
}
}
if (s.ok()) {
// In WritePrepared there could be gap in sequence numbers. This breaks
// the trick we use in kPointInTimeRecovery which assumes the first seq
// in the log right after the corrupted log is one larger than the last
// seq we read from the wals. To let this trick keep working, we add a
// dummy entry with the expected sequence to the first log right after
// recovery. In non-WritePrepared case also the new log after recovery
// could be empty, and thus missing the consecutive seq hint to
// distinguish middle-log corruption to
// corrupted-log-remained-after-recovery. This case also will be
// addressed by a dummy write.
if (recovered_seq != kMaxSequenceNumber) {
WriteBatch empty_batch;
WriteBatchInternal::SetSequence(&empty_batch, recovered_seq);
WriteOptions write_options;
uint64_t log_used, log_size;
log::Writer* log_writer = impl->logs_.back().writer;
s = impl->WriteToWAL(empty_batch, log_writer, &log_used, &log_size,
Env::IO_TOTAL, /*with_db_mutex==*/true);
if (s.ok()) {
// Need to fsync, otherwise it might get lost after a power reset.
s = impl->FlushWAL(false);
if (s.ok()) {
s = log_writer->file()->Sync(impl->immutable_db_options_.use_fsync);
}
}
}
}
}
if (s.ok()) {
s = impl->LogAndApplyForRecovery(recovery_ctx);
}
if (s.ok() && impl->immutable_db_options_.persist_stats_to_disk) {
impl->mutex_.AssertHeld();
s = impl->InitPersistStatsColumnFamily();
}
if (s.ok()) {
// set column family handles
for (auto cf : column_families) {
@ -1783,6 +1899,7 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname,
}
}
}
if (s.ok()) {
SuperVersionContext sv_context(/* create_superversion */ true);
for (auto cfd : *impl->versions_->GetColumnFamilySet()) {
@ -1790,43 +1907,6 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname,
cfd, &sv_context, *cfd->GetLatestMutableCFOptions());
}
sv_context.Clean();
if (impl->two_write_queues_) {
impl->log_write_mutex_.Lock();
}
impl->alive_log_files_.push_back(
DBImpl::LogFileNumberSize(impl->logfile_number_));
impl->alive_log_files_tail_ = impl->alive_log_files_.rbegin();
if (impl->two_write_queues_) {
impl->log_write_mutex_.Unlock();
}
}
if (s.ok()) {
// In WritePrepared there could be gap in sequence numbers. This breaks
// the trick we use in kPointInTimeRecovery which assumes the first seq in
// the log right after the corrupted log is one larger than the last seq
// we read from the wals. To let this trick keep working, we add a dummy
// entry with the expected sequence to the first log right after recovery.
// In non-WritePrepared case also the new log after recovery could be
// empty, and thus missing the consecutive seq hint to distinguish
// middle-log corruption to corrupted-log-remained-after-recovery. This
// case also will be addressed by a dummy write.
if (recovered_seq != kMaxSequenceNumber) {
WriteBatch empty_batch;
WriteBatchInternal::SetSequence(&empty_batch, recovered_seq);
WriteOptions write_options;
uint64_t log_used, log_size;
log::Writer* log_writer = impl->logs_.back().writer;
s = impl->WriteToWAL(empty_batch, log_writer, &log_used, &log_size,
Env::IO_TOTAL, /*with_db_mutex==*/true);
if (s.ok()) {
// Need to fsync, otherwise it might get lost after a power reset.
s = impl->FlushWAL(false);
if (s.ok()) {
s = log_writer->file()->Sync(impl->immutable_db_options_.use_fsync);
}
}
}
}
}
if (s.ok() && impl->immutable_db_options_.persist_stats_to_disk) {
// try to read format version
@ -1853,7 +1933,8 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname,
if (cfd->ioptions()->merge_operator != nullptr &&
!cfd->mem()->IsMergeOperatorSupported()) {
s = Status::InvalidArgument(
"The memtable of column family %s does not support merge operator "
"The memtable of column family %s does not support merge "
"operator "
"its options.merge_operator is non-null",
cfd->GetName().c_str());
}

View File

@ -33,7 +33,8 @@ DBImplSecondary::~DBImplSecondary() {}
Status DBImplSecondary::Recover(
const std::vector<ColumnFamilyDescriptor>& column_families,
bool /*readonly*/, bool /*error_if_wal_file_exists*/,
bool /*error_if_data_exists_in_wals*/, uint64_t*) {
bool /*error_if_data_exists_in_wals*/, uint64_t*,
RecoveryContext* /*recovery_ctx*/) {
mutex_.AssertHeld();
JobContext job_context(0);
@ -731,8 +732,11 @@ Status DB::OpenAsSecondary(
}
Status DBImplSecondary::CompactWithoutInstallation(
ColumnFamilyHandle* cfh, const CompactionServiceInput& input,
CompactionServiceResult* result) {
const OpenAndCompactOptions& options, ColumnFamilyHandle* cfh,
const CompactionServiceInput& input, CompactionServiceResult* result) {
if (options.canceled && options.canceled->load(std::memory_order_acquire)) {
return Status::Incomplete(Status::SubCode::kManualCompactionPaused);
}
InstrumentedMutexLock l(&mutex_);
auto cfd = static_cast_with_check<ColumnFamilyHandleImpl>(cfh)->cfd();
if (!cfd) {
@ -794,7 +798,7 @@ Status DBImplSecondary::CompactWithoutInstallation(
file_options_for_compaction_, versions_.get(), &shutting_down_,
&log_buffer, output_dir.get(), stats_, &mutex_, &error_handler_,
input.snapshots, table_cache_, &event_logger_, dbname_, io_tracer_,
db_id_, db_session_id_, secondary_path_, input, result);
options.canceled, db_id_, db_session_id_, secondary_path_, input, result);
mutex_.Unlock();
s = compaction_job.Run();
@ -813,9 +817,13 @@ Status DBImplSecondary::CompactWithoutInstallation(
}
Status DB::OpenAndCompact(
const std::string& name, const std::string& output_directory,
const std::string& input, std::string* result,
const OpenAndCompactOptions& options, const std::string& name,
const std::string& output_directory, const std::string& input,
std::string* output,
const CompactionServiceOptionsOverride& override_options) {
if (options.canceled && options.canceled->load(std::memory_order_acquire)) {
return Status::Incomplete(Status::SubCode::kManualCompactionPaused);
}
CompactionServiceInput compaction_input;
Status s = CompactionServiceInput::Read(input, &compaction_input);
if (!s.ok()) {
@ -845,6 +853,7 @@ Status DB::OpenAndCompact(
override_options.table_factory;
compaction_input.column_family.options.sst_partitioner_factory =
override_options.sst_partitioner_factory;
compaction_input.db_options.listeners = override_options.listeners;
std::vector<ColumnFamilyDescriptor> column_families;
column_families.push_back(compaction_input.column_family);
@ -868,10 +877,10 @@ Status DB::OpenAndCompact(
CompactionServiceResult compaction_result;
DBImplSecondary* db_secondary = static_cast_with_check<DBImplSecondary>(db);
assert(handles.size() > 0);
s = db_secondary->CompactWithoutInstallation(handles[0], compaction_input,
&compaction_result);
s = db_secondary->CompactWithoutInstallation(
options, handles[0], compaction_input, &compaction_result);
Status serialization_status = compaction_result.Write(result);
Status serialization_status = compaction_result.Write(output);
for (auto& handle : handles) {
delete handle;
@ -883,6 +892,14 @@ Status DB::OpenAndCompact(
return s;
}
Status DB::OpenAndCompact(
const std::string& name, const std::string& output_directory,
const std::string& input, std::string* output,
const CompactionServiceOptionsOverride& override_options) {
return OpenAndCompact(OpenAndCompactOptions(), name, output_directory, input,
output, override_options);
}
#else // !ROCKSDB_LITE
Status DB::OpenAsSecondary(const Options& /*options*/,

View File

@ -81,8 +81,8 @@ class DBImplSecondary : public DBImpl {
// and log_readers_ to facilitate future operations.
Status Recover(const std::vector<ColumnFamilyDescriptor>& column_families,
bool read_only, bool error_if_wal_file_exists,
bool error_if_data_exists_in_wals,
uint64_t* = nullptr) override;
bool error_if_data_exists_in_wals, uint64_t* = nullptr,
RecoveryContext* recovery_ctx = nullptr) override;
// Implementations of the DB interface
using DB::Get;
@ -234,10 +234,11 @@ class DBImplSecondary : public DBImpl {
Status CheckConsistency() override;
#ifndef NDEBUG
Status TEST_CompactWithoutInstallation(ColumnFamilyHandle* cfh,
Status TEST_CompactWithoutInstallation(const OpenAndCompactOptions& options,
ColumnFamilyHandle* cfh,
const CompactionServiceInput& input,
CompactionServiceResult* result) {
return CompactWithoutInstallation(cfh, input, result);
return CompactWithoutInstallation(options, cfh, input, result);
}
#endif // NDEBUG
@ -352,7 +353,8 @@ class DBImplSecondary : public DBImpl {
// Run compaction without installation, the output files will be placed in the
// secondary DB path. The LSM tree won't be changed, the secondary DB is still
// in read-only mode.
Status CompactWithoutInstallation(ColumnFamilyHandle* cfh,
Status CompactWithoutInstallation(const OpenAndCompactOptions& options,
ColumnFamilyHandle* cfh,
const CompactionServiceInput& input,
CompactionServiceResult* result);

View File

@ -78,7 +78,6 @@ DBIter::DBIter(Env* _env, const ReadOptions& read_options,
range_del_agg_(&ioptions.internal_comparator, s),
db_impl_(db_impl),
cfd_(cfd),
start_seqnum_(0ULL),
timestamp_ub_(read_options.timestamp),
timestamp_lb_(read_options.iter_start_ts),
timestamp_size_(timestamp_ub_ ? timestamp_ub_->size() : 0) {
@ -328,25 +327,7 @@ bool DBIter::FindNextUserEntryInternal(bool skipping_saved_key,
case kTypeSingleDeletion:
// Arrange to skip all upcoming entries for this key since
// they are hidden by this deletion.
// if iterartor specified start_seqnum we
// 1) return internal key, including the type
// 2) return ikey only if ikey.seqnum >= start_seqnum_
// note that if deletion seqnum is < start_seqnum_ we
// just skip it like in normal iterator.
if (start_seqnum_ > 0) {
if (ikey_.sequence >= start_seqnum_) {
saved_key_.SetInternalKey(ikey_);
valid_ = true;
return true;
} else {
saved_key_.SetUserKey(
ikey_.user_key,
!pin_thru_lifetime_ ||
!iter_.iter()->IsKeyPinned() /* copy */);
skipping_saved_key = true;
PERF_COUNTER_ADD(internal_delete_skipped_count, 1);
}
} else if (timestamp_lb_) {
if (timestamp_lb_) {
saved_key_.SetInternalKey(ikey_);
valid_ = true;
return true;
@ -360,28 +341,7 @@ bool DBIter::FindNextUserEntryInternal(bool skipping_saved_key,
break;
case kTypeValue:
case kTypeBlobIndex:
if (start_seqnum_ > 0) {
if (ikey_.sequence >= start_seqnum_) {
saved_key_.SetInternalKey(ikey_);
if (ikey_.type == kTypeBlobIndex) {
if (!SetBlobValueIfNeeded(ikey_.user_key, iter_.value())) {
return false;
}
}
valid_ = true;
return true;
} else {
// this key and all previous versions shouldn't be included,
// skipping_saved_key
saved_key_.SetUserKey(
ikey_.user_key,
!pin_thru_lifetime_ ||
!iter_.iter()->IsKeyPinned() /* copy */);
skipping_saved_key = true;
}
} else if (timestamp_lb_) {
if (timestamp_lb_) {
saved_key_.SetInternalKey(ikey_);
if (ikey_.type == kTypeBlobIndex) {

View File

@ -151,7 +151,7 @@ class DBIter final : public Iterator {
}
Slice key() const override {
assert(valid_);
if (start_seqnum_ > 0 || timestamp_lb_) {
if (timestamp_lb_) {
return saved_key_.GetInternalKey();
} else {
const Slice ukey_and_ts = saved_key_.GetUserKey();
@ -371,9 +371,6 @@ class DBIter final : public Iterator {
ROCKSDB_FIELD_UNUSED
#endif
ColumnFamilyData* cfd_;
// for diff snapshots we want the lower bound on the seqnum;
// if this value > 0 iterator will return internal keys
SequenceNumber start_seqnum_;
const Slice* const timestamp_ub_;
const Slice* const timestamp_lb_;
const size_t timestamp_size_;

View File

@ -3,6 +3,7 @@
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "db/blob/blob_index.h"
#include "db/db_test_util.h"
#include "rocksdb/rocksdb_namespace.h"
@ -54,7 +55,7 @@ class DbKvChecksumTest
case WriteBatchOpType::kMerge:
s = wb.Merge(cf_handle, "key", "val");
break;
case WriteBatchOpType::kBlobIndex:
case WriteBatchOpType::kBlobIndex: {
// TODO(ajkr): use public API once available.
uint32_t cf_id;
if (cf_handle == nullptr) {
@ -62,8 +63,14 @@ class DbKvChecksumTest
} else {
cf_id = cf_handle->GetID();
}
s = WriteBatchInternal::PutBlobIndex(&wb, cf_id, "key", "val");
std::string blob_index;
BlobIndex::EncodeInlinedTTL(&blob_index, /* expiration */ 9876543210,
"val");
s = WriteBatchInternal::PutBlobIndex(&wb, cf_id, "key", blob_index);
break;
}
case WriteBatchOpType::kNum:
assert(false);
}

View File

@ -1997,6 +1997,37 @@ TEST_F(DBPropertiesTest, GetMapPropertyDbStats) {
Close();
}
TEST_F(DBPropertiesTest, GetMapPropertyBlockCacheEntryStats) {
// Currently only verifies the expected properties are present
std::map<std::string, std::string> values;
ASSERT_TRUE(
db_->GetMapProperty(DB::Properties::kBlockCacheEntryStats, &values));
ASSERT_TRUE(values.find(BlockCacheEntryStatsMapKeys::CacheId()) !=
values.end());
ASSERT_TRUE(values.find(BlockCacheEntryStatsMapKeys::CacheCapacityBytes()) !=
values.end());
ASSERT_TRUE(
values.find(
BlockCacheEntryStatsMapKeys::LastCollectionDurationSeconds()) !=
values.end());
ASSERT_TRUE(
values.find(BlockCacheEntryStatsMapKeys::LastCollectionAgeSeconds()) !=
values.end());
for (size_t i = 0; i < kNumCacheEntryRoles; ++i) {
CacheEntryRole role = static_cast<CacheEntryRole>(i);
ASSERT_TRUE(values.find(BlockCacheEntryStatsMapKeys::EntryCount(role)) !=
values.end());
ASSERT_TRUE(values.find(BlockCacheEntryStatsMapKeys::UsedBytes(role)) !=
values.end());
ASSERT_TRUE(values.find(BlockCacheEntryStatsMapKeys::UsedPercent(role)) !=
values.end());
}
// There should be no extra values in the map.
ASSERT_EQ(3 * kNumCacheEntryRoles + 4, values.size());
}
namespace {
std::string PopMetaIndexKey(InternalIterator* meta_iter) {
Status s = meta_iter->status();

View File

@ -188,8 +188,8 @@ TEST_F(DBSecondaryTest, SimpleInternalCompaction) {
auto cfh = db_secondary_->DefaultColumnFamily();
CompactionServiceResult result;
ASSERT_OK(db_secondary_full()->TEST_CompactWithoutInstallation(cfh, input,
&result));
ASSERT_OK(db_secondary_full()->TEST_CompactWithoutInstallation(
OpenAndCompactOptions(), cfh, input, &result));
ASSERT_EQ(result.output_files.size(), 1);
InternalKey smallest, largest;
@ -248,8 +248,8 @@ TEST_F(DBSecondaryTest, InternalCompactionMultiLevels) {
OpenSecondary(options);
auto cfh = db_secondary_->DefaultColumnFamily();
CompactionServiceResult result;
ASSERT_OK(db_secondary_full()->TEST_CompactWithoutInstallation(cfh, input1,
&result));
ASSERT_OK(db_secondary_full()->TEST_CompactWithoutInstallation(
OpenAndCompactOptions(), cfh, input1, &result));
ASSERT_OK(result.status);
// pick 2 files on level 1 for compaction, which has 6 overlap files on L2
@ -261,8 +261,8 @@ TEST_F(DBSecondaryTest, InternalCompactionMultiLevels) {
}
input2.output_level = 2;
ASSERT_OK(db_secondary_full()->TEST_CompactWithoutInstallation(cfh, input2,
&result));
ASSERT_OK(db_secondary_full()->TEST_CompactWithoutInstallation(
OpenAndCompactOptions(), cfh, input2, &result));
ASSERT_OK(result.status);
CloseSecondary();
@ -273,15 +273,15 @@ TEST_F(DBSecondaryTest, InternalCompactionMultiLevels) {
}
OpenSecondary(options);
cfh = db_secondary_->DefaultColumnFamily();
Status s = db_secondary_full()->TEST_CompactWithoutInstallation(cfh, input2,
&result);
Status s = db_secondary_full()->TEST_CompactWithoutInstallation(
OpenAndCompactOptions(), cfh, input2, &result);
ASSERT_TRUE(s.IsInvalidArgument());
ASSERT_OK(result.status);
// TODO: L0 -> L1 compaction should success, currently version is not built
// if files is missing.
// ASSERT_OK(db_secondary_full()->TEST_CompactWithoutInstallation(cfh,
// input1, &result));
// ASSERT_OK(db_secondary_full()->TEST_CompactWithoutInstallation(OpenAndCompactOptions(),
// cfh, input1, &result));
}
TEST_F(DBSecondaryTest, InternalCompactionCompactedFiles) {
@ -319,8 +319,8 @@ TEST_F(DBSecondaryTest, InternalCompactionCompactedFiles) {
auto cfh = db_secondary_->DefaultColumnFamily();
CompactionServiceResult result;
Status s =
db_secondary_full()->TEST_CompactWithoutInstallation(cfh, input, &result);
Status s = db_secondary_full()->TEST_CompactWithoutInstallation(
OpenAndCompactOptions(), cfh, input, &result);
ASSERT_TRUE(s.IsInvalidArgument());
ASSERT_OK(result.status);
}
@ -356,15 +356,15 @@ TEST_F(DBSecondaryTest, InternalCompactionMissingFiles) {
auto cfh = db_secondary_->DefaultColumnFamily();
CompactionServiceResult result;
Status s =
db_secondary_full()->TEST_CompactWithoutInstallation(cfh, input, &result);
Status s = db_secondary_full()->TEST_CompactWithoutInstallation(
OpenAndCompactOptions(), cfh, input, &result);
ASSERT_TRUE(s.IsInvalidArgument());
ASSERT_OK(result.status);
input.input_files.erase(input.input_files.begin());
ASSERT_OK(db_secondary_full()->TEST_CompactWithoutInstallation(cfh, input,
&result));
ASSERT_OK(db_secondary_full()->TEST_CompactWithoutInstallation(
OpenAndCompactOptions(), cfh, input, &result));
ASSERT_OK(result.status);
}

View File

@ -2427,7 +2427,7 @@ TEST_F(DBTest, SnapshotFiles) {
// Also test GetLiveFilesStorageInfo
std::vector<LiveFileStorageInfo> new_infos;
ASSERT_OK(dbfull()->GetLiveFilesStorageInfo(LiveFilesStorageInfoOptions(),
ASSERT_OK(db_->GetLiveFilesStorageInfo(LiveFilesStorageInfoOptions(),
&new_infos));
// Close DB (while deletions disabled)

View File

@ -287,7 +287,6 @@ TEST_F(DBWALTest, Recover) {
ReopenWithColumnFamilies({"default", "pikachu"}, CurrentOptions());
ASSERT_EQ("v1", Get(1, "foo"));
ASSERT_EQ("v1", Get(1, "foo"));
ASSERT_EQ("v5", Get(1, "baz"));
ASSERT_OK(Put(1, "bar", "v2"));

View File

@ -1796,6 +1796,54 @@ TEST_F(ExternalSSTFileBasicTest, IngestWithTemperature) {
ASSERT_EQ(std::atoi(prop.c_str()), 0);
}
TEST_F(ExternalSSTFileBasicTest, FailIfNotBottommostLevel) {
Options options = GetDefaultOptions();
std::string file_path = sst_files_dir_ + ToString(1);
SstFileWriter sfw(EnvOptions(), options);
ASSERT_OK(sfw.Open(file_path));
ASSERT_OK(sfw.Put("b", "dontcare"));
ASSERT_OK(sfw.Finish());
// Test universal compaction + ingest with snapshot consistency
options.create_if_missing = true;
options.compaction_style = CompactionStyle::kCompactionStyleUniversal;
DestroyAndReopen(options);
{
const Snapshot* snapshot = db_->GetSnapshot();
ManagedSnapshot snapshot_guard(db_, snapshot);
IngestExternalFileOptions ifo;
ifo.fail_if_not_bottommost_level = true;
ifo.snapshot_consistency = true;
const Status s = db_->IngestExternalFile({file_path}, ifo);
ASSERT_TRUE(s.IsTryAgain());
}
// Test level compaction
options.compaction_style = CompactionStyle::kCompactionStyleLevel;
options.num_levels = 2;
DestroyAndReopen(options);
ASSERT_OK(db_->Put(WriteOptions(), "a", "dontcare"));
ASSERT_OK(db_->Put(WriteOptions(), "c", "dontcare"));
ASSERT_OK(db_->Flush(FlushOptions()));
ASSERT_OK(db_->Put(WriteOptions(), "b", "dontcare"));
ASSERT_OK(db_->Put(WriteOptions(), "d", "dontcare"));
ASSERT_OK(db_->Flush(FlushOptions()));
{
CompactRangeOptions cro;
cro.bottommost_level_compaction = BottommostLevelCompaction::kForce;
ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
IngestExternalFileOptions ifo;
ifo.fail_if_not_bottommost_level = true;
const Status s = db_->IngestExternalFile({file_path}, ifo);
ASSERT_TRUE(s.IsTryAgain());
}
}
INSTANTIATE_TEST_CASE_P(ExternalSSTFileBasicTest, ExternalSSTFileBasicTest,
testing::Values(std::make_tuple(true, true),
std::make_tuple(true, false),

View File

@ -739,6 +739,12 @@ Status ExternalSstFileIngestionJob::AssignLevelAndSeqnoForIngestedFile(
if (force_global_seqno) {
*assigned_seqno = last_seqno + 1;
if (compaction_style == kCompactionStyleUniversal || files_overlap_) {
if (ingestion_options_.fail_if_not_bottommost_level) {
status = Status::TryAgain(
"Files cannot be ingested to Lmax. Please make sure key range of "
"Lmax does not overlap with files to ingest.");
return status;
}
file_to_ingest->picked_level = 0;
return status;
}
@ -808,6 +814,15 @@ Status ExternalSstFileIngestionJob::AssignLevelAndSeqnoForIngestedFile(
target_level = 0;
*assigned_seqno = last_seqno + 1;
}
if (ingestion_options_.fail_if_not_bottommost_level &&
target_level < cfd_->NumberLevels() - 1) {
status = Status::TryAgain(
"Files cannot be ingested to Lmax. Please make sure key range of Lmax "
"does not overlap with files to ingest.");
return status;
}
TEST_SYNC_POINT_CALLBACK(
"ExternalSstFileIngestionJob::AssignLevelAndSeqnoForIngestedFile",
&overlap_with_db);

View File

@ -455,16 +455,18 @@ Status FlushJob::MemPurge() {
ioptions->logger, true /* internal key corruption is not ok */,
existing_snapshots_.empty() ? 0 : existing_snapshots_.back(),
snapshot_checker_);
assert(job_context_);
SequenceNumber job_snapshot_seq = job_context_->GetJobSnapshotSequence();
CompactionIterator c_iter(
iter.get(), (cfd_->internal_comparator()).user_comparator(), &merge,
kMaxSequenceNumber, &existing_snapshots_,
earliest_write_conflict_snapshot_, snapshot_checker_, env,
ShouldReportDetailedTime(env, ioptions->stats),
earliest_write_conflict_snapshot_, job_snapshot_seq, snapshot_checker_,
env, ShouldReportDetailedTime(env, ioptions->stats),
true /* internal key corruption is not ok */, range_del_agg.get(),
nullptr, ioptions->allow_data_in_errors,
/*compaction=*/nullptr, compaction_filter.get(),
/*shutting_down=*/nullptr,
/*preserve_deletes_seqnum=*/0, /*manual_compaction_paused=*/nullptr,
/*manual_compaction_paused=*/nullptr,
/*manual_compaction_canceled=*/nullptr, ioptions->info_log,
&(cfd_->GetFullHistoryTsLow()));
@ -829,6 +831,7 @@ Status FlushJob::WriteLevel0Table() {
// TEST_SYNC_POINT_CALLBACK not used.
TEST_SYNC_POINT_CALLBACK("FlushJob::WriteLevel0Table:num_memtables",
&mems_size);
assert(job_context_);
for (MemTable* m : mems_) {
ROCKS_LOG_INFO(
db_options_.info_log,
@ -911,16 +914,19 @@ Status FlushJob::WriteLevel0Table() {
TableFileCreationReason::kFlush, creation_time, oldest_key_time,
current_time, db_id_, db_session_id_, 0 /* target_file_size */,
meta_.fd.GetNumber());
const SequenceNumber job_snapshot_seq =
job_context_->GetJobSnapshotSequence();
s = BuildTable(
dbname_, versions_, db_options_, tboptions, file_options_,
cfd_->table_cache(), iter.get(), std::move(range_del_iters), &meta_,
&blob_file_additions, existing_snapshots_,
earliest_write_conflict_snapshot_, snapshot_checker_,
mutable_cf_options_.paranoid_file_checks, cfd_->internal_stats(),
&io_s, io_tracer_, BlobFileCreationReason::kFlush, event_logger_,
job_context_->job_id, Env::IO_HIGH, &table_properties_, write_hint,
full_history_ts_low, blob_callback_, &num_input_entries,
&memtable_payload_bytes, &memtable_garbage_bytes);
earliest_write_conflict_snapshot_, job_snapshot_seq,
snapshot_checker_, mutable_cf_options_.paranoid_file_checks,
cfd_->internal_stats(), &io_s, io_tracer_,
BlobFileCreationReason::kFlush, event_logger_, job_context_->job_id,
Env::IO_HIGH, &table_properties_, write_hint, full_history_ts_low,
blob_callback_, &num_input_entries, &memtable_payload_bytes,
&memtable_garbage_bytes);
// TODO: Cleanup io_status in BuildTable and table builders
assert(!s.ok() || io_s.ok());
io_s.PermitUncheckedError();

View File

@ -27,6 +27,7 @@
#include "rocksdb/system_clock.h"
#include "rocksdb/table.h"
#include "table/block_based/cachable_entry.h"
#include "util/hash_containers.h"
#include "util/string_util.h"
namespace ROCKSDB_NAMESPACE {
@ -304,6 +305,8 @@ static const std::string num_blob_files = "num-blob-files";
static const std::string blob_stats = "blob-stats";
static const std::string total_blob_file_size = "total-blob-file-size";
static const std::string live_blob_file_size = "live-blob-file-size";
static const std::string live_blob_file_garbage_size =
"live-blob-file-garbage-size";
const std::string DB::Properties::kNumFilesAtLevelPrefix =
rocksdb_prefix + num_files_at_level_prefix;
@ -404,8 +407,10 @@ const std::string DB::Properties::kTotalBlobFileSize =
rocksdb_prefix + total_blob_file_size;
const std::string DB::Properties::kLiveBlobFileSize =
rocksdb_prefix + live_blob_file_size;
const std::string DB::Properties::kLiveBlobFileGarbageSize =
rocksdb_prefix + live_blob_file_garbage_size;
const std::unordered_map<std::string, DBPropertyInfo>
const UnorderedMap<std::string, DBPropertyInfo>
InternalStats::ppt_name_to_info = {
{DB::Properties::kNumFilesAtLevelPrefix,
{false, &InternalStats::HandleNumFilesAtLevel, nullptr, nullptr,
@ -562,6 +567,9 @@ const std::unordered_map<std::string, DBPropertyInfo>
{DB::Properties::kLiveBlobFileSize,
{false, nullptr, &InternalStats::HandleLiveBlobFileSize, nullptr,
nullptr}},
{DB::Properties::kLiveBlobFileGarbageSize,
{false, nullptr, &InternalStats::HandleLiveBlobFileGarbageSize,
nullptr, nullptr}},
};
InternalStats::InternalStats(int num_levels, SystemClock* clock,
@ -694,17 +702,21 @@ void InternalStats::CacheEntryRoleStats::ToMap(
std::map<std::string, std::string>* values, SystemClock* clock) const {
values->clear();
auto& v = *values;
v["id"] = cache_id;
v["capacity"] = ROCKSDB_NAMESPACE::ToString(cache_capacity);
v["secs_for_last_collection"] =
v[BlockCacheEntryStatsMapKeys::CacheId()] = cache_id;
v[BlockCacheEntryStatsMapKeys::CacheCapacityBytes()] =
ROCKSDB_NAMESPACE::ToString(cache_capacity);
v[BlockCacheEntryStatsMapKeys::LastCollectionDurationSeconds()] =
ROCKSDB_NAMESPACE::ToString(GetLastDurationMicros() / 1000000.0);
v["secs_since_last_collection"] = ROCKSDB_NAMESPACE::ToString(
(clock->NowMicros() - last_end_time_micros_) / 1000000U);
v[BlockCacheEntryStatsMapKeys::LastCollectionAgeSeconds()] =
ROCKSDB_NAMESPACE::ToString((clock->NowMicros() - last_end_time_micros_) /
1000000U);
for (size_t i = 0; i < kNumCacheEntryRoles; ++i) {
std::string role = kCacheEntryRoleToHyphenString[i];
v["count." + role] = ROCKSDB_NAMESPACE::ToString(entry_counts[i]);
v["bytes." + role] = ROCKSDB_NAMESPACE::ToString(total_charges[i]);
v["percent." + role] =
auto role = static_cast<CacheEntryRole>(i);
v[BlockCacheEntryStatsMapKeys::EntryCount(role)] =
ROCKSDB_NAMESPACE::ToString(entry_counts[i]);
v[BlockCacheEntryStatsMapKeys::UsedBytes(role)] =
ROCKSDB_NAMESPACE::ToString(total_charges[i]);
v[BlockCacheEntryStatsMapKeys::UsedPercent(role)] =
ROCKSDB_NAMESPACE::ToString(100.0 * total_charges[i] / cache_capacity);
}
}
@ -757,6 +769,7 @@ bool InternalStats::HandleLiveSstFilesSizeAtTemperature(std::string* value,
bool InternalStats::HandleNumBlobFiles(uint64_t* value, DBImpl* /*db*/,
Version* /*version*/) {
assert(value);
assert(cfd_);
const auto* current = cfd_->current();
@ -773,6 +786,7 @@ bool InternalStats::HandleNumBlobFiles(uint64_t* value, DBImpl* /*db*/,
}
bool InternalStats::HandleBlobStats(std::string* value, Slice /*suffix*/) {
assert(value);
assert(cfd_);
const auto* current = cfd_->current();
@ -797,6 +811,7 @@ bool InternalStats::HandleBlobStats(std::string* value, Slice /*suffix*/) {
bool InternalStats::HandleTotalBlobFileSize(uint64_t* value, DBImpl* /*db*/,
Version* /*version*/) {
assert(value);
assert(cfd_);
*value = cfd_->GetTotalBlobFileSize();
@ -806,6 +821,7 @@ bool InternalStats::HandleTotalBlobFileSize(uint64_t* value, DBImpl* /*db*/,
bool InternalStats::HandleLiveBlobFileSize(uint64_t* value, DBImpl* /*db*/,
Version* /*version*/) {
assert(value);
assert(cfd_);
const auto* current = cfd_->current();
@ -819,6 +835,23 @@ bool InternalStats::HandleLiveBlobFileSize(uint64_t* value, DBImpl* /*db*/,
return true;
}
bool InternalStats::HandleLiveBlobFileGarbageSize(uint64_t* value,
DBImpl* /*db*/,
Version* /*version*/) {
assert(value);
assert(cfd_);
const auto* current = cfd_->current();
assert(current);
const auto* vstorage = current->storage_info();
assert(vstorage);
*value = vstorage->GetBlobStats().total_garbage_size;
return true;
}
const DBPropertyInfo* GetPropertyInfo(const Slice& property) {
std::string ppt_name = GetPropertyNameAndArg(property).first.ToString();
auto ppt_info_iter = InternalStats::ppt_name_to_info.find(ppt_name);

View File

@ -18,6 +18,7 @@
#include "cache/cache_entry_roles.h"
#include "db/version_set.h"
#include "rocksdb/system_clock.h"
#include "util/hash_containers.h"
class ColumnFamilyData;
@ -387,7 +388,7 @@ class InternalStats {
SystemClock* clock) const;
private:
std::unordered_map<Cache::DeleterFn, CacheEntryRole> role_map_;
UnorderedMap<Cache::DeleterFn, CacheEntryRole> role_map_;
uint64_t GetLastDurationMicros() const;
};
@ -482,7 +483,7 @@ class InternalStats {
// Store a mapping from the user-facing DB::Properties string to our
// DBPropertyInfo struct used internally for retrieving properties.
static const std::unordered_map<std::string, DBPropertyInfo> ppt_name_to_info;
static const UnorderedMap<std::string, DBPropertyInfo> ppt_name_to_info;
private:
void DumpDBMapStats(std::map<std::string, std::string>* db_stats);
@ -690,6 +691,9 @@ class InternalStats {
bool HandleBlobStats(std::string* value, Slice suffix);
bool HandleTotalBlobFileSize(uint64_t* value, DBImpl* db, Version* version);
bool HandleLiveBlobFileSize(uint64_t* value, DBImpl* db, Version* version);
bool HandleLiveBlobFileGarbageSize(uint64_t* value, DBImpl* db,
Version* version);
// Total number of background errors encountered. Every time a flush task
// or compaction task fails, this counter is incremented. The failure can
// be caused by any possible reason, including file system errors, out of

View File

@ -124,6 +124,14 @@ struct JobContext {
job_snapshot != nullptr || sv_have_sth;
}
SequenceNumber GetJobSnapshotSequence() const {
if (job_snapshot) {
assert(job_snapshot->snapshot());
return job_snapshot->snapshot()->GetSequenceNumber();
}
return kMaxSequenceNumber;
}
// Structure to store information for candidate files to delete.
struct CandidateFileInfo {
std::string file_name;

View File

@ -13,7 +13,6 @@
#include <functional>
#include <memory>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>
@ -31,6 +30,7 @@
#include "table/multiget_context.h"
#include "util/dynamic_bloom.h"
#include "util/hash.h"
#include "util/hash_containers.h"
namespace ROCKSDB_NAMESPACE {
@ -566,7 +566,7 @@ class MemTable {
const SliceTransform* insert_with_hint_prefix_extractor_;
// Insert hints for each prefix.
std::unordered_map<Slice, void*, SliceHasher> insert_hints_;
UnorderedMapH<Slice, void*, SliceHasher> insert_hints_;
// Timestamp of oldest key
std::atomic<uint64_t> oldest_key_time_;

View File

@ -450,7 +450,7 @@ class Repairer {
dbname_, /* versions */ nullptr, immutable_db_options_, tboptions,
file_options_, table_cache_.get(), iter.get(),
std::move(range_del_iters), &meta, nullptr /* blob_file_additions */,
{}, kMaxSequenceNumber, snapshot_checker,
{}, kMaxSequenceNumber, kMaxSequenceNumber, snapshot_checker,
false /* paranoid_file_checks*/, nullptr /* internal_stats */, &io_s,
nullptr /*IOTracer*/, BlobFileCreationReason::kRecovery,
nullptr /* event_logger */, 0 /* job_id */, Env::IO_HIGH,
@ -565,10 +565,13 @@ class Repairer {
counter++;
t->meta.UpdateBoundaries(key, iter->value(), parsed.sequence,
status = t->meta.UpdateBoundaries(key, iter->value(), parsed.sequence,
parsed.type);
if (!status.ok()) {
break;
}
if (!iter->status().ok()) {
}
if (status.ok() && !iter->status().ok()) {
status = iter->status();
}
delete iter;

View File

@ -6,9 +6,12 @@
#ifndef ROCKSDB_LITE
#include "db/transaction_log_impl.h"
#include <cinttypes>
#include "db/write_batch_internal.h"
#include "file/sequence_file_reader.h"
#include "util/defer.h"
namespace ROCKSDB_NAMESPACE {
@ -24,16 +27,17 @@ TransactionLogIteratorImpl::TransactionLogIteratorImpl(
soptions_(soptions),
starting_sequence_number_(seq),
files_(std::move(files)),
versions_(versions),
seq_per_batch_(seq_per_batch),
io_tracer_(io_tracer),
started_(false),
is_valid_(false),
current_file_index_(0),
current_batch_seq_(0),
current_last_seq_(0),
versions_(versions),
seq_per_batch_(seq_per_batch),
io_tracer_(io_tracer) {
current_last_seq_(0) {
assert(files_ != nullptr);
assert(versions_ != nullptr);
assert(!seq_per_batch_);
current_status_.PermitUncheckedError(); // Clear on start
reporter_.env = options_->env;
reporter_.info_log = options_->info_log.get();
@ -94,8 +98,21 @@ void TransactionLogIteratorImpl::SeekToStartSequence(uint64_t start_file_index,
Slice record;
started_ = false;
is_valid_ = false;
// Check invariant of TransactionLogIterator when SeekToStartSequence()
// succeeds.
const Defer defer([this]() {
if (is_valid_) {
assert(current_status_.ok());
if (starting_sequence_number_ > current_batch_seq_) {
assert(current_batch_seq_ < current_last_seq_);
assert(current_last_seq_ >= starting_sequence_number_);
}
}
});
if (files_->size() <= start_file_index) {
return;
} else if (!current_status_.ok()) {
return;
}
Status s =
OpenLogReader(files_->at(static_cast<size_t>(start_file_index)).get());
@ -151,6 +168,9 @@ void TransactionLogIteratorImpl::SeekToStartSequence(uint64_t start_file_index,
}
void TransactionLogIteratorImpl::Next() {
if (!current_status_.ok()) {
return;
}
return NextImpl(false);
}
@ -159,7 +179,7 @@ void TransactionLogIteratorImpl::NextImpl(bool internal) {
is_valid_ = false;
if (!internal && !started_) {
// Runs every time until we can seek to the start sequence
return SeekToStartSequence();
SeekToStartSequence();
}
while(true) {
assert(current_log_reader_);
@ -249,55 +269,10 @@ void TransactionLogIteratorImpl::UpdateCurrentWriteBatch(const Slice& record) {
return SeekToStartSequence(current_file_index_, !seq_per_batch_);
}
struct BatchCounter : public WriteBatch::Handler {
SequenceNumber sequence_;
BatchCounter(SequenceNumber sequence) : sequence_(sequence) {}
Status MarkNoop(bool empty_batch) override {
if (!empty_batch) {
sequence_++;
}
return Status::OK();
}
Status MarkEndPrepare(const Slice&) override {
sequence_++;
return Status::OK();
}
Status MarkCommit(const Slice&) override {
sequence_++;
return Status::OK();
}
Status MarkCommitWithTimestamp(const Slice&, const Slice&) override {
++sequence_;
return Status::OK();
}
Status PutCF(uint32_t /*cf*/, const Slice& /*key*/,
const Slice& /*val*/) override {
return Status::OK();
}
Status DeleteCF(uint32_t /*cf*/, const Slice& /*key*/) override {
return Status::OK();
}
Status SingleDeleteCF(uint32_t /*cf*/, const Slice& /*key*/) override {
return Status::OK();
}
Status MergeCF(uint32_t /*cf*/, const Slice& /*key*/,
const Slice& /*val*/) override {
return Status::OK();
}
Status MarkBeginPrepare(bool) override { return Status::OK(); }
Status MarkRollback(const Slice&) override { return Status::OK(); }
};
current_batch_seq_ = WriteBatchInternal::Sequence(batch.get());
if (seq_per_batch_) {
BatchCounter counter(current_batch_seq_);
batch->Iterate(&counter);
current_last_seq_ = counter.sequence_;
} else {
assert(!seq_per_batch_);
current_last_seq_ =
current_batch_seq_ + WriteBatchInternal::Count(batch.get()) - 1;
}
// currentBatchSeq_ can only change here
assert(current_last_seq_ <= versions_->LastSequence());

View File

@ -81,6 +81,13 @@ class TransactionLogIteratorImpl : public TransactionLogIterator {
const EnvOptions& soptions_;
SequenceNumber starting_sequence_number_;
std::unique_ptr<VectorLogPtr> files_;
// Used only to get latest seq. num
// TODO(icanadi) can this be just a callback?
VersionSet const* const versions_;
const bool seq_per_batch_;
std::shared_ptr<IOTracer> io_tracer_;
// State variables
bool started_;
bool is_valid_; // not valid when it starts of.
Status current_status_;
@ -104,14 +111,11 @@ class TransactionLogIteratorImpl : public TransactionLogIterator {
SequenceNumber
current_batch_seq_; // sequence number at start of current batch
SequenceNumber current_last_seq_; // last sequence in the current batch
// Used only to get latest seq. num
// TODO(icanadi) can this be just a callback?
VersionSet const* const versions_;
const bool seq_per_batch_;
// Reads from transaction log only if the writebatch record has been written
bool RestrictedRead(Slice* record);
// Seeks to startingSequenceNumber reading from startFileIndex in files_.
// If strict is set,then must get a batch starting with startingSequenceNumber
// Seeks to starting_sequence_number_ reading from start_file_index in files_.
// If strict is set, then must get a batch starting with
// starting_sequence_number_.
void SeekToStartSequence(uint64_t start_file_index = 0, bool strict = false);
// Implementation of Next. SeekToStartSequence calls it internally with
// internal=true to let it find next entry even if it has to jump gaps because
@ -120,10 +124,9 @@ class TransactionLogIteratorImpl : public TransactionLogIterator {
void NextImpl(bool internal = false);
// Check if batch is expected, else return false
bool IsBatchExpected(const WriteBatch* batch, SequenceNumber expected_seq);
// Update current batch if a continuous batch is found, else return false
// Update current batch if a continuous batch is found.
void UpdateCurrentWriteBatch(const Slice& record);
Status OpenLogReader(const LogFile* file);
std::shared_ptr<IOTracer> io_tracer_;
};
} // namespace ROCKSDB_NAMESPACE
#endif // ROCKSDB_LITE

View File

@ -28,35 +28,19 @@ uint64_t PackFileNumberAndPathId(uint64_t number, uint64_t path_id) {
return number | (path_id * (kFileNumberMask + 1));
}
void FileMetaData::UpdateBoundaries(const Slice& key, const Slice& value,
Status FileMetaData::UpdateBoundaries(const Slice& key, const Slice& value,
SequenceNumber seqno,
ValueType value_type) {
if (smallest.size() == 0) {
smallest.DecodeFrom(key);
}
largest.DecodeFrom(key);
fd.smallest_seqno = std::min(fd.smallest_seqno, seqno);
fd.largest_seqno = std::max(fd.largest_seqno, seqno);
if (value_type == kTypeBlobIndex) {
BlobIndex blob_index;
const Status s = blob_index.DecodeFrom(value);
if (!s.ok()) {
return;
return s;
}
if (blob_index.IsInlined()) {
return;
}
if (blob_index.HasTTL()) {
return;
}
// Paranoid check: this should not happen because BlobDB numbers the blob
// files starting from 1.
if (!blob_index.IsInlined() && !blob_index.HasTTL()) {
if (blob_index.file_number() == kInvalidBlobFileNumber) {
return;
return Status::Corruption("Invalid blob file number");
}
if (oldest_blob_file_number == kInvalidBlobFileNumber ||
@ -64,6 +48,16 @@ void FileMetaData::UpdateBoundaries(const Slice& key, const Slice& value,
oldest_blob_file_number = blob_index.file_number();
}
}
}
if (smallest.size() == 0) {
smallest.DecodeFrom(key);
}
largest.DecodeFrom(key);
fd.smallest_seqno = std::min(fd.smallest_seqno, seqno);
fd.largest_seqno = std::max(fd.largest_seqno, seqno);
return Status::OK();
}
void VersionEdit::Clear() {

View File

@ -245,7 +245,7 @@ struct FileMetaData {
// REQUIRED: Keys must be given to the function in sorted order (it expects
// the last key to be the largest).
void UpdateBoundaries(const Slice& key, const Slice& value,
Status UpdateBoundaries(const Slice& key, const Slice& value,
SequenceNumber seqno, ValueType value_type);
// Unlike UpdateBoundaries, ranges do not need to be presented in any

View File

@ -9,6 +9,7 @@
#include "db/version_edit.h"
#include "db/blob/blob_index.h"
#include "rocksdb/advanced_options.h"
#include "test_util/sync_point.h"
#include "test_util/testharness.h"
@ -611,6 +612,128 @@ TEST_F(VersionEditTest, IgnorableTags) {
SyncPoint::GetInstance()->DisableProcessing();
}
TEST(FileMetaDataTest, UpdateBoundariesBlobIndex) {
FileMetaData meta;
{
constexpr uint64_t file_number = 10;
constexpr uint32_t path_id = 0;
constexpr uint64_t file_size = 0;
meta.fd = FileDescriptor(file_number, path_id, file_size);
}
constexpr char key[] = "foo";
constexpr uint64_t expected_oldest_blob_file_number = 20;
// Plain old value (does not affect oldest_blob_file_number)
{
constexpr char value[] = "value";
constexpr SequenceNumber seq = 200;
ASSERT_OK(meta.UpdateBoundaries(key, value, seq, kTypeValue));
ASSERT_EQ(meta.oldest_blob_file_number, kInvalidBlobFileNumber);
}
// Non-inlined, non-TTL blob index (sets oldest_blob_file_number)
{
constexpr uint64_t blob_file_number = 25;
static_assert(blob_file_number > expected_oldest_blob_file_number,
"unexpected");
constexpr uint64_t offset = 1000;
constexpr uint64_t size = 100;
std::string blob_index;
BlobIndex::EncodeBlob(&blob_index, blob_file_number, offset, size,
kNoCompression);
constexpr SequenceNumber seq = 201;
ASSERT_OK(meta.UpdateBoundaries(key, blob_index, seq, kTypeBlobIndex));
ASSERT_EQ(meta.oldest_blob_file_number, blob_file_number);
}
// Another one, with the oldest blob file number (updates
// oldest_blob_file_number)
{
constexpr uint64_t offset = 2000;
constexpr uint64_t size = 300;
std::string blob_index;
BlobIndex::EncodeBlob(&blob_index, expected_oldest_blob_file_number, offset,
size, kNoCompression);
constexpr SequenceNumber seq = 202;
ASSERT_OK(meta.UpdateBoundaries(key, blob_index, seq, kTypeBlobIndex));
ASSERT_EQ(meta.oldest_blob_file_number, expected_oldest_blob_file_number);
}
// Inlined TTL blob index (does not affect oldest_blob_file_number)
{
constexpr uint64_t expiration = 9876543210;
constexpr char value[] = "value";
std::string blob_index;
BlobIndex::EncodeInlinedTTL(&blob_index, expiration, value);
constexpr SequenceNumber seq = 203;
ASSERT_OK(meta.UpdateBoundaries(key, blob_index, seq, kTypeBlobIndex));
ASSERT_EQ(meta.oldest_blob_file_number, expected_oldest_blob_file_number);
}
// Non-inlined TTL blob index (does not affect oldest_blob_file_number, even
// though file number is smaller)
{
constexpr uint64_t expiration = 9876543210;
constexpr uint64_t blob_file_number = 15;
static_assert(blob_file_number < expected_oldest_blob_file_number,
"unexpected");
constexpr uint64_t offset = 2000;
constexpr uint64_t size = 500;
std::string blob_index;
BlobIndex::EncodeBlobTTL(&blob_index, expiration, blob_file_number, offset,
size, kNoCompression);
constexpr SequenceNumber seq = 204;
ASSERT_OK(meta.UpdateBoundaries(key, blob_index, seq, kTypeBlobIndex));
ASSERT_EQ(meta.oldest_blob_file_number, expected_oldest_blob_file_number);
}
// Corrupt blob index
{
constexpr char corrupt_blob_index[] = "!corrupt!";
constexpr SequenceNumber seq = 205;
ASSERT_TRUE(
meta.UpdateBoundaries(key, corrupt_blob_index, seq, kTypeBlobIndex)
.IsCorruption());
ASSERT_EQ(meta.oldest_blob_file_number, expected_oldest_blob_file_number);
}
// Invalid blob file number
{
constexpr uint64_t offset = 10000;
constexpr uint64_t size = 1000;
std::string blob_index;
BlobIndex::EncodeBlob(&blob_index, kInvalidBlobFileNumber, offset, size,
kNoCompression);
constexpr SequenceNumber seq = 206;
ASSERT_TRUE(meta.UpdateBoundaries(key, blob_index, seq, kTypeBlobIndex)
.IsCorruption());
ASSERT_EQ(meta.oldest_blob_file_number, expected_oldest_blob_file_number);
}
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {

View File

@ -2073,6 +2073,9 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k,
if (is_blob_index) {
if (do_merge && value) {
TEST_SYNC_POINT_CALLBACK("Version::Get::TamperWithBlobIndex",
value);
constexpr FilePrefetchBuffer* prefetch_buffer = nullptr;
constexpr uint64_t* bytes_read = nullptr;
@ -2300,6 +2303,9 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
if (iter->is_blob_index) {
if (iter->value) {
TEST_SYNC_POINT_CALLBACK("Version::MultiGet::TamperWithBlobIndex",
&(*iter));
const Slice& blob_index_slice = *(iter->value);
BlobIndex blob_index;
Status tmp_s = blob_index.DecodeFrom(blob_index_slice);

View File

@ -54,6 +54,7 @@
#include "table/get_context.h"
#include "table/multiget_context.h"
#include "trace_replay/block_cache_tracer.h"
#include "util/hash_containers.h"
namespace ROCKSDB_NAMESPACE {
@ -579,7 +580,7 @@ class VersionStorageInfo {
// Map of all table files in version. Maps file number to (level, position on
// level).
using FileLocations = std::unordered_map<uint64_t, FileLocation>;
using FileLocations = UnorderedMap<uint64_t, FileLocation>;
FileLocations file_locations_;
// Vector of blob files in version sorted by blob file number.
@ -1344,8 +1345,7 @@ class VersionSet {
protected:
using VersionBuilderMap =
std::unordered_map<uint32_t,
std::unique_ptr<BaseReferencedVersionBuilder>>;
UnorderedMap<uint32_t, std::unique_ptr<BaseReferencedVersionBuilder>>;
struct ManifestWriter;

View File

@ -105,6 +105,11 @@ Status WalManager::GetUpdatesSince(
SequenceNumber seq, std::unique_ptr<TransactionLogIterator>* iter,
const TransactionLogIterator::ReadOptions& read_options,
VersionSet* version_set) {
if (seq_per_batch_) {
return Status::NotSupported();
}
assert(!seq_per_batch_);
// Get all sorted Wal Files.
// Do binary search and open files and find the seq number.

View File

@ -113,7 +113,7 @@ class WalManager {
// obsolete files will be deleted every this seconds if ttl deletion is
// enabled and archive size_limit is disabled.
static const uint64_t kDefaultIntervalToDeleteObsoleteWAL = 600;
static constexpr uint64_t kDefaultIntervalToDeleteObsoleteWAL = 600;
std::shared_ptr<IOTracer> io_tracer_;
};

View File

@ -232,7 +232,7 @@ GEM
jekyll-seo-tag (~> 2.1)
minitest (5.15.0)
multipart-post (2.1.1)
nokogiri (1.13.3)
nokogiri (1.13.4)
mini_portile2 (~> 2.8.0)
racc (~> 1.4)
octokit (4.22.0)

View File

@ -241,9 +241,7 @@ Status FilePrefetchBuffer::PrefetchAsync(const IOOptions& opts,
del_fn_ = nullptr;
}
// TODO akanksha: Update TEST_SYNC_POINT after Async APIs are merged with
// normal prefetching.
TEST_SYNC_POINT("FilePrefetchBuffer::Prefetch:Start");
TEST_SYNC_POINT("FilePrefetchBuffer::PrefetchAsync:Start");
Status s;
size_t prefetch_size = length + readahead_size;
@ -475,7 +473,10 @@ bool FilePrefetchBuffer::TryReadFromCacheAsync(
return false;
}
}
if (implicit_auto_readahead_ && async_io_) {
// async prefetching is enabled if it's implicit_auto_readahead_ or
// explicit readahead_size_ is passed along with ReadOptions.async_io =
// true.
if (async_io_) {
// Prefetch n + readahead_size_/2 synchronously as remaining
// readahead_size_/2 will be prefetched asynchronously.
s = PrefetchAsync(opts, reader, offset, n, readahead_size_ / 2,
@ -516,6 +517,16 @@ bool FilePrefetchBuffer::TryReadFromCacheAsync(
void FilePrefetchBuffer::PrefetchAsyncCallback(const FSReadRequest& req,
void* /*cb_arg*/) {
uint32_t index = curr_ ^ 1;
#ifndef NDEBUG
if (req.result.size() < req.len) {
// Fake an IO error to force db_stress fault injection to ignore
// truncated read errors
IGNORE_STATUS_IF_ERROR(Status::IOError());
}
IGNORE_STATUS_IF_ERROR(req.status);
#endif
if (req.status.ok()) {
if (req.offset + req.result.size() <=
bufs_[index].offset_ + bufs_[index].buffer_.CurrentSize()) {

View File

@ -36,7 +36,6 @@ struct BufferInfo {
class FilePrefetchBuffer {
public:
static const int kMinNumFileReadsToStartAutoReadahead = 2;
static const size_t kInitAutoReadaheadSize = 8 * 1024;
// Constructor.
//
@ -68,6 +67,7 @@ class FilePrefetchBuffer {
bool async_io = false, FileSystem* fs = nullptr)
: curr_(0),
readahead_size_(readahead_size),
initial_auto_readahead_size_(readahead_size),
max_readahead_size_(max_readahead_size),
min_offset_read_(port::kMaxSizet),
enable_(enable),
@ -184,9 +184,8 @@ class FilePrefetchBuffer {
bufs_[curr_].offset_ + bufs_[curr_].buffer_.CurrentSize()) &&
IsBlockSequential(offset) &&
(num_file_reads_ + 1 > kMinNumFileReadsToStartAutoReadahead)) {
size_t initial_auto_readahead_size = kInitAutoReadaheadSize;
readahead_size_ =
std::max(initial_auto_readahead_size,
std::max(initial_auto_readahead_size_,
(readahead_size_ >= value ? readahead_size_ - value : 0));
}
}
@ -238,7 +237,7 @@ class FilePrefetchBuffer {
// Called in case of implicit auto prefetching.
void ResetValues() {
num_file_reads_ = 1;
readahead_size_ = kInitAutoReadaheadSize;
readahead_size_ = initial_auto_readahead_size_;
}
std::vector<BufferInfo> bufs_;
@ -246,6 +245,7 @@ class FilePrefetchBuffer {
// consumed currently.
uint32_t curr_;
size_t readahead_size_;
size_t initial_auto_readahead_size_;
// FilePrefetchBuffer object won't be created from Iterator flow if
// max_readahead_size_ = 0.
size_t max_readahead_size_;

View File

@ -275,8 +275,8 @@ TEST_P(PrefetchTest, ConfigureAutoMaxReadaheadSize) {
break;
case 1:
// max_auto_readahead_size is set less than
// BlockBasedTable::kInitAutoReadaheadSize. So readahead_size remains
// equal to max_auto_readahead_size.
// initial_auto_readahead_size. So readahead_size remains equal to
// max_auto_readahead_size.
ASSERT_OK(db_->SetOptions({{"block_based_table_factory",
"{max_auto_readahead_size=4096;}"}}));
break;
@ -321,6 +321,145 @@ TEST_P(PrefetchTest, ConfigureAutoMaxReadaheadSize) {
SyncPoint::GetInstance()->ClearAllCallBacks();
Close();
}
TEST_P(PrefetchTest, ConfigureInternalAutoReadaheadSize) {
// First param is if the mockFS support_prefetch or not
bool support_prefetch =
std::get<0>(GetParam()) &&
test::IsPrefetchSupported(env_->GetFileSystem(), dbname_);
// Second param is if directIO is enabled or not
bool use_direct_io = std::get<1>(GetParam());
std::shared_ptr<MockFS> fs =
std::make_shared<MockFS>(env_->GetFileSystem(), support_prefetch);
std::unique_ptr<Env> env(new CompositeEnvWrapper(env_, fs));
Options options = CurrentOptions();
options.write_buffer_size = 1024;
options.create_if_missing = true;
options.compression = kNoCompression;
options.env = env.get();
options.disable_auto_compactions = true;
if (use_direct_io) {
options.use_direct_reads = true;
options.use_direct_io_for_flush_and_compaction = true;
}
BlockBasedTableOptions table_options;
table_options.no_block_cache = true;
table_options.cache_index_and_filter_blocks = false;
table_options.metadata_block_size = 1024;
table_options.index_type =
BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
table_options.initial_auto_readahead_size = 0;
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
int buff_prefetch_count = 0;
// DB open will create table readers unless we reduce the table cache
// capacity. SanitizeOptions will set max_open_files to minimum of 20.
// Table cache is allocated with max_open_files - 10 as capacity. So
// override max_open_files to 10 so table cache capacity will become 0.
// This will prevent file open during DB open and force the file to be
// opened during Iteration.
SyncPoint::GetInstance()->SetCallBack(
"SanitizeOptions::AfterChangeMaxOpenFiles", [&](void* arg) {
int* max_open_files = (int*)arg;
*max_open_files = 11;
});
SyncPoint::GetInstance()->SetCallBack("FilePrefetchBuffer::Prefetch:Start",
[&](void*) { buff_prefetch_count++; });
SyncPoint::GetInstance()->EnableProcessing();
SyncPoint::GetInstance()->EnableProcessing();
Status s = TryReopen(options);
if (use_direct_io && (s.IsNotSupported() || s.IsInvalidArgument())) {
// If direct IO is not supported, skip the test
return;
} else {
ASSERT_OK(s);
}
Random rnd(309);
int key_count = 0;
const int num_keys_per_level = 100;
// Level 0 : Keys in range [0, 99], Level 1:[100, 199], Level 2:[200, 299].
for (int level = 2; level >= 0; level--) {
key_count = level * num_keys_per_level;
for (int i = 0; i < num_keys_per_level; ++i) {
ASSERT_OK(Put(Key(key_count++), rnd.RandomString(500)));
}
ASSERT_OK(Flush());
MoveFilesToLevel(level);
}
Close();
TryReopen(options);
{
auto iter = std::unique_ptr<Iterator>(db_->NewIterator(ReadOptions()));
fs->ClearPrefetchCount();
buff_prefetch_count = 0;
std::vector<int> buff_prefetch_level_count = {0, 0, 0};
for (int level = 2; level >= 0; level--) {
key_count = level * num_keys_per_level;
switch (level) {
case 0:
// initial_auto_readahead_size is set 0 so data and index blocks are
// not prefetched.
ASSERT_OK(db_->SetOptions({{"block_based_table_factory",
"{initial_auto_readahead_size=0;}"}}));
break;
case 1:
// intial_auto_readahead_size and max_auto_readahead_size are set same
// so readahead_size remains same.
ASSERT_OK(db_->SetOptions({{"block_based_table_factory",
"{initial_auto_readahead_size=4096;max_"
"auto_readahead_size=4096;}"}}));
break;
case 2:
ASSERT_OK(
db_->SetOptions({{"block_based_table_factory",
"{initial_auto_readahead_size=65536;}"}}));
break;
default:
assert(false);
}
for (int i = 0; i < num_keys_per_level; ++i) {
iter->Seek(Key(key_count++));
iter->Next();
}
buff_prefetch_level_count[level] = buff_prefetch_count;
if (support_prefetch && !use_direct_io) {
if (level == 0) {
ASSERT_FALSE(fs->IsPrefetchCalled());
} else {
ASSERT_TRUE(fs->IsPrefetchCalled());
}
fs->ClearPrefetchCount();
} else {
ASSERT_FALSE(fs->IsPrefetchCalled());
if (level == 0) {
ASSERT_EQ(buff_prefetch_count, 0);
} else {
ASSERT_GT(buff_prefetch_count, 0);
}
buff_prefetch_count = 0;
}
}
if (!support_prefetch) {
ASSERT_GT(buff_prefetch_level_count[1], buff_prefetch_level_count[2]);
}
}
SyncPoint::GetInstance()->DisableProcessing();
SyncPoint::GetInstance()->ClearAllCallBacks();
Close();
}
#endif // !ROCKSDB_LITE
TEST_P(PrefetchTest, PrefetchWhenReseek) {
@ -730,6 +869,7 @@ TEST_P(PrefetchTest1, DBIterLevelReadAhead) {
}
MoveFilesToLevel(2);
int buff_prefetch_count = 0;
int buff_async_prefetch_count = 0;
int readahead_carry_over_count = 0;
int num_sst_files = NumTableFilesAtLevel(2);
size_t current_readahead_size = 0;
@ -740,6 +880,10 @@ TEST_P(PrefetchTest1, DBIterLevelReadAhead) {
"FilePrefetchBuffer::Prefetch:Start",
[&](void*) { buff_prefetch_count++; });
SyncPoint::GetInstance()->SetCallBack(
"FilePrefetchBuffer::PrefetchAsync:Start",
[&](void*) { buff_async_prefetch_count++; });
// The callback checks, since reads are sequential, readahead_size doesn't
// start from 8KB when iterator moves to next file and its called
// num_sst_files-1 times (excluding for first file).
@ -749,7 +893,6 @@ TEST_P(PrefetchTest1, DBIterLevelReadAhead) {
size_t readahead_size = *reinterpret_cast<size_t*>(arg);
if (readahead_carry_over_count) {
ASSERT_GT(readahead_size, 8 * 1024);
// ASSERT_GE(readahead_size, current_readahead_size);
}
});
@ -764,7 +907,6 @@ TEST_P(PrefetchTest1, DBIterLevelReadAhead) {
ReadOptions ro;
if (is_adaptive_readahead) {
ro.adaptive_readahead = true;
// TODO akanksha: Remove after adding new units.
ro.async_io = true;
}
@ -776,11 +918,13 @@ TEST_P(PrefetchTest1, DBIterLevelReadAhead) {
num_keys++;
}
ASSERT_EQ(num_keys, total_keys);
ASSERT_GT(buff_prefetch_count, 0);
// For index and data blocks.
if (is_adaptive_readahead) {
ASSERT_EQ(readahead_carry_over_count, 2 * (num_sst_files - 1));
ASSERT_GT(buff_async_prefetch_count, 0);
} else {
ASSERT_GT(buff_prefetch_count, 0);
ASSERT_EQ(readahead_carry_over_count, 0);
}
@ -858,7 +1002,8 @@ TEST_P(PrefetchTest2, NonSequentialReads) {
int set_readahead = 0;
size_t readahead_size = 0;
SyncPoint::GetInstance()->SetCallBack("FilePrefetchBuffer::Prefetch:Start",
SyncPoint::GetInstance()->SetCallBack(
"FilePrefetchBuffer::PrefetchAsync:Start",
[&](void*) { buff_prefetch_count++; });
SyncPoint::GetInstance()->SetCallBack(
"BlockPrefetcher::SetReadaheadState",
@ -953,7 +1098,8 @@ TEST_P(PrefetchTest2, DecreaseReadAheadIfInCache) {
size_t expected_current_readahead_size = 8 * 1024;
size_t decrease_readahead_size = 8 * 1024;
SyncPoint::GetInstance()->SetCallBack("FilePrefetchBuffer::Prefetch:Start",
SyncPoint::GetInstance()->SetCallBack(
"FilePrefetchBuffer::PrefetchAsync:Start",
[&](void*) { buff_prefetch_count++; });
SyncPoint::GetInstance()->SetCallBack(
"FilePrefetchBuffer::TryReadFromCache", [&](void* arg) {
@ -1043,8 +1189,17 @@ TEST_P(PrefetchTest2, DecreaseReadAheadIfInCache) {
extern "C" bool RocksDbIOUringEnable() { return true; }
class PrefetchTestWithPosix : public DBTestBase,
public ::testing::WithParamInterface<bool> {
public:
PrefetchTestWithPosix() : DBTestBase("prefetch_test_with_posix", true) {}
};
INSTANTIATE_TEST_CASE_P(PrefetchTestWithPosix, PrefetchTestWithPosix,
::testing::Bool());
// Tests the default implementation of ReadAsync API with PosixFileSystem.
TEST_F(PrefetchTest2, ReadAsyncWithPosixFS) {
TEST_P(PrefetchTestWithPosix, ReadAsyncWithPosixFS) {
if (mem_env_ || encrypted_env_) {
ROCKSDB_GTEST_SKIP("Test requires non-mem or non-encrypted environment");
return;
@ -1100,19 +1255,25 @@ TEST_F(PrefetchTest2, ReadAsyncWithPosixFS) {
int buff_prefetch_count = 0;
bool read_async_called = false;
SyncPoint::GetInstance()->SetCallBack("FilePrefetchBuffer::Prefetch:Start",
[&](void*) { buff_prefetch_count++; });
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
"UpdateResults::io_uring_result",
[&](void* /*arg*/) { read_async_called = true; });
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
// Read the keys.
{
ReadOptions ro;
ro.adaptive_readahead = true;
ro.async_io = true;
if (GetParam()) {
ro.readahead_size = 16 * 1024;
}
SyncPoint::GetInstance()->SetCallBack(
"FilePrefetchBuffer::PrefetchAsync:Start",
[&](void*) { buff_prefetch_count++; });
SyncPoint::GetInstance()->SetCallBack(
"UpdateResults::io_uring_result",
[&](void* /*arg*/) { read_async_called = true; });
SyncPoint::GetInstance()->EnableProcessing();
// Read the keys.
{
ASSERT_OK(options.statistics->Reset());
auto iter = std::unique_ptr<Iterator>(db_->NewIterator(ro));
int num_keys = 0;

View File

@ -160,7 +160,9 @@ class WritableFileWriter {
bool perform_data_verification_;
uint32_t buffered_data_crc32c_checksum_;
bool buffered_data_with_checksum_;
#ifndef ROCKSDB_LITE
Temperature temperature_;
#endif // ROCKSDB_LITE
public:
WritableFileWriter(
@ -191,8 +193,10 @@ class WritableFileWriter {
checksum_finalized_(false),
perform_data_verification_(perform_data_verification),
buffered_data_crc32c_checksum_(0),
buffered_data_with_checksum_(buffered_data_with_checksum),
temperature_(options.temperature) {
buffered_data_with_checksum_(buffered_data_with_checksum) {
#ifndef ROCKSDB_LITE
temperature_ = options.temperature;
#endif // ROCKSDB_LITE
assert(!use_direct_io() || max_buffer_size_ > 0);
TEST_SYNC_POINT_CALLBACK("WritableFileWriter::WritableFileWriter:0",
reinterpret_cast<void*>(max_buffer_size_));

View File

@ -1766,6 +1766,8 @@ extern ROCKSDB_LIBRARY_API void rocksdb_lru_cache_options_set_memory_allocator(
extern ROCKSDB_LIBRARY_API rocksdb_cache_t* rocksdb_cache_create_lru(
size_t capacity);
extern ROCKSDB_LIBRARY_API rocksdb_cache_t*
rocksdb_cache_create_lru_with_strict_capacity_limit(size_t capacity);
extern ROCKSDB_LIBRARY_API rocksdb_cache_t* rocksdb_cache_create_lru_opts(
rocksdb_lru_cache_options_t*);
extern ROCKSDB_LIBRARY_API void rocksdb_cache_destroy(rocksdb_cache_t* cache);

View File

@ -131,7 +131,7 @@ extern std::shared_ptr<Cache> NewLRUCache(const LRUCacheOptions& cache_opts);
// Options structure for configuring a SecondaryCache instance based on
// LRUCache. The LRUCacheOptions.secondary_cache is not used and
// should not be set.
struct LRUSecondaryCacheOptions : LRUCacheOptions {
struct CompressedSecondaryCacheOptions : LRUCacheOptions {
// The compression method (if any) that is used to compress data.
CompressionType compression_type = CompressionType::kLZ4Compression;
@ -142,8 +142,8 @@ struct LRUSecondaryCacheOptions : LRUCacheOptions {
// header in varint32 format.
uint32_t compress_format_version = 2;
LRUSecondaryCacheOptions() {}
LRUSecondaryCacheOptions(
CompressedSecondaryCacheOptions() {}
CompressedSecondaryCacheOptions(
size_t _capacity, int _num_shard_bits, bool _strict_capacity_limit,
double _high_pri_pool_ratio,
std::shared_ptr<MemoryAllocator> _memory_allocator = nullptr,
@ -161,7 +161,7 @@ struct LRUSecondaryCacheOptions : LRUCacheOptions {
// EXPERIMENTAL
// Create a new Secondary Cache that is implemented on top of LRUCache.
extern std::shared_ptr<SecondaryCache> NewLRUSecondaryCache(
extern std::shared_ptr<SecondaryCache> NewCompressedSecondaryCache(
size_t capacity, int num_shard_bits = -1,
bool strict_capacity_limit = false, double high_pri_pool_ratio = 0.5,
std::shared_ptr<MemoryAllocator> memory_allocator = nullptr,
@ -171,8 +171,8 @@ extern std::shared_ptr<SecondaryCache> NewLRUSecondaryCache(
CompressionType compression_type = CompressionType::kLZ4Compression,
uint32_t compress_format_version = 2);
extern std::shared_ptr<SecondaryCache> NewLRUSecondaryCache(
const LRUSecondaryCacheOptions& opts);
extern std::shared_ptr<SecondaryCache> NewCompressedSecondaryCache(
const CompressedSecondaryCacheOptions& opts);
// Similar to NewLRUCache, but create a cache based on CLOCK algorithm with
// better concurrent performance in some cases. See util/clock_cache.cc for
@ -540,4 +540,58 @@ class Cache {
std::shared_ptr<MemoryAllocator> memory_allocator_;
};
// Classifications of block cache entries.
//
// Developer notes: Adding a new enum to this class requires corresponding
// updates to `kCacheEntryRoleToCamelString` and
// `kCacheEntryRoleToHyphenString`. Do not add to this enum after `kMisc` since
// `kNumCacheEntryRoles` assumes `kMisc` comes last.
enum class CacheEntryRole {
// Block-based table data block
kDataBlock,
// Block-based table filter block (full or partitioned)
kFilterBlock,
// Block-based table metadata block for partitioned filter
kFilterMetaBlock,
// Block-based table deprecated filter block (old "block-based" filter)
kDeprecatedFilterBlock,
// Block-based table index block
kIndexBlock,
// Other kinds of block-based table block
kOtherBlock,
// WriteBufferManager reservations to account for memtable usage
kWriteBuffer,
// BlockBasedTableBuilder reservations to account for
// compression dictionary building buffer's memory usage
kCompressionDictionaryBuildingBuffer,
// Filter reservations to account for
// (new) bloom and ribbon filter construction's memory usage
kFilterConstruction,
// BlockBasedTableReader reservations to account for
// its memory usage
kBlockBasedTableReader,
// Default bucket, for miscellaneous cache entries. Do not use for
// entries that could potentially add up to large usage.
kMisc,
};
constexpr uint32_t kNumCacheEntryRoles =
static_cast<uint32_t>(CacheEntryRole::kMisc) + 1;
// Obtain a hyphen-separated, lowercase name of a `CacheEntryRole`.
const std::string& GetCacheEntryRoleName(CacheEntryRole);
// For use with `GetMapProperty()` for property
// `DB::Properties::kBlockCacheEntryStats`. On success, the map will
// be populated with all keys that can be obtained from these functions.
struct BlockCacheEntryStatsMapKeys {
static const std::string& CacheId();
static const std::string& CacheCapacityBytes();
static const std::string& LastCollectionDurationSeconds();
static const std::string& LastCollectionAgeSeconds();
static std::string EntryCount(CacheEntryRole);
static std::string UsedBytes(CacheEntryRole);
static std::string UsedPercent(CacheEntryRole);
};
} // namespace ROCKSDB_NAMESPACE

View File

@ -10,11 +10,13 @@
#include <stdint.h>
#include <stdio.h>
#include <map>
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "rocksdb/iterator.h"
#include "rocksdb/listener.h"
#include "rocksdb/metadata.h"
@ -290,6 +292,12 @@ class DB {
const std::string& input, std::string* output,
const CompactionServiceOptionsOverride& override_options);
static Status OpenAndCompact(
const OpenAndCompactOptions& options, const std::string& name,
const std::string& output_directory, const std::string& input,
std::string* output,
const CompactionServiceOptionsOverride& override_options);
// Experimental and subject to change
// Open DB and trim data newer than specified timestamp.
// The trim_ts specified the user-defined timestamp trim bound.
@ -870,7 +878,9 @@ class DB {
static const std::string kLevelStats;
// "rocksdb.block-cache-entry-stats" - returns a multi-line string or
// map with statistics on block cache usage.
// map with statistics on block cache usage. See
// `BlockCacheEntryStatsMapKeys` for structured representation of keys
// available in the map form.
static const std::string kBlockCacheEntryStats;
// "rocksdb.num-immutable-mem-table" - returns number of immutable
@ -1060,6 +1070,10 @@ class DB {
// "rocksdb.live-blob-file-size" - returns the total size of all blob
// files in the current version.
static const std::string kLiveBlobFileSize;
// "rocksdb.live-blob-file-garbage-size" - returns the total amount of
// garbage in the blob files in the current version.
static const std::string kLiveBlobFileGarbageSize;
};
#endif /* ROCKSDB_LITE */
@ -1426,39 +1440,6 @@ class DB {
virtual Status EnableFileDeletions(bool force = true) = 0;
#ifndef ROCKSDB_LITE
// GetLiveFiles followed by GetSortedWalFiles can generate a lossless backup
// Retrieve the list of all files in the database. The files are
// relative to the dbname and are not absolute paths. Despite being relative
// paths, the file names begin with "/". The valid size of the manifest file
// is returned in manifest_file_size. The manifest file is an ever growing
// file, but only the portion specified by manifest_file_size is valid for
// this snapshot. Setting flush_memtable to true does Flush before recording
// the live files. Setting flush_memtable to false is useful when we don't
// want to wait for flush which may have to wait for compaction to complete
// taking an indeterminate time.
//
// In case you have multiple column families, even if flush_memtable is true,
// you still need to call GetSortedWalFiles after GetLiveFiles to compensate
// for new data that arrived to already-flushed column families while other
// column families were flushing
virtual Status GetLiveFiles(std::vector<std::string>&,
uint64_t* manifest_file_size,
bool flush_memtable = true) = 0;
// Retrieve the sorted list of all wal files with earliest file first
virtual Status GetSortedWalFiles(VectorLogPtr& files) = 0;
// Retrieve information about the current wal file
//
// Note that the log might have rolled after this call in which case
// the current_log_file would not point to the current log file.
//
// Additionally, for the sake of optimization current_log_file->StartSequence
// would always be set to 0
virtual Status GetCurrentWalFile(
std::unique_ptr<LogFile>* current_log_file) = 0;
// Retrieves the creation time of the oldest file in the DB.
// This API only works if max_open_files = -1, if it is not then
// Status returned is Status::NotSupported()
@ -1472,9 +1453,12 @@ class DB {
virtual Status GetCreationTimeOfOldestFile(uint64_t* creation_time) = 0;
// Note: this API is not yet consistent with WritePrepared transactions.
// Sets iter to an iterator that is positioned at a write-batch containing
// seq_number. If the sequence number is non existent, it returns an iterator
// at the first available seq_no after the requested seq_no
//
// Sets iter to an iterator that is positioned at a write-batch whose
// sequence number range [start_seq, end_seq] covers seq_number. If no such
// write-batch exists, then iter is positioned at the next write-batch whose
// start_seq > seq_number.
//
// Returns Status::OK if iterator is valid
// Must set WAL_ttl_seconds or WAL_size_limit_MB to large values to
// use this api, else the WAL files will get
@ -1500,26 +1484,30 @@ class DB {
// path relative to the db directory. eg. 000001.sst, /archive/000003.log
virtual Status DeleteFile(std::string name) = 0;
// Returns a list of all table files with their level, start key
// and end key
// Obtains a list of all live table (SST) files and how they fit into the
// LSM-trees, such as column family, level, key range, etc.
// This builds a de-normalized form of GetAllColumnFamilyMetaData().
// For information about all files in a DB, use GetLiveFilesStorageInfo().
virtual void GetLiveFilesMetaData(
std::vector<LiveFileMetaData>* /*metadata*/) {}
// Return a list of all table and blob files checksum info.
// Return a list of all table (SST) and blob files checksum info.
// Note: This function might be of limited use because it cannot be
// synchronized with GetLiveFiles.
// synchronized with other "live files" APIs. GetLiveFilesStorageInfo()
// is recommended instead.
virtual Status GetLiveFilesChecksumInfo(FileChecksumList* checksum_list) = 0;
// EXPERIMENTAL: This function is not yet feature-complete.
// Get information about all live files that make up a DB, for making
// live copies (Checkpoint, backups, etc.) or other storage-related purposes.
// Use DisableFileDeletions() before and EnableFileDeletions() after to
// preserve the files for live copy.
// If creating a live copy, use DisableFileDeletions() before and
// EnableFileDeletions() after to prevent deletions.
// For LSM-tree metadata, use Get*MetaData() functions instead.
virtual Status GetLiveFilesStorageInfo(
const LiveFilesStorageInfoOptions& opts,
std::vector<LiveFileStorageInfo>* files) = 0;
// Obtains the meta data of the specified column family of the DB.
// Obtains the LSM-tree meta data of the specified column family of the DB,
// including metadata for each live table (SST) file in that column family.
virtual void GetColumnFamilyMetaData(ColumnFamilyHandle* /*column_family*/,
ColumnFamilyMetaData* /*metadata*/) {}
@ -1528,12 +1516,43 @@ class DB {
GetColumnFamilyMetaData(DefaultColumnFamily(), metadata);
}
// Obtains the meta data of all column families for the DB.
// The returned map contains one entry for each column family indexed by the
// name of the column family.
// Obtains the LSM-tree meta data of all column families of the DB,
// including metadata for each live table (SST) file in the DB.
virtual void GetAllColumnFamilyMetaData(
std::vector<ColumnFamilyMetaData>* /*metadata*/) {}
// Retrieve the list of all files in the database except WAL files. The files
// are relative to the dbname (or db_paths/cf_paths), not absolute paths.
// (Not recommended with db_paths/cf_paths because that information is not
// returned.) Despite being relative paths, the file names begin with "/".
// The valid size of the manifest file is returned in manifest_file_size.
// The manifest file is an ever growing file, but only the portion specified
// by manifest_file_size is valid for this snapshot. Setting flush_memtable
// to true does Flush before recording the live files. Setting flush_memtable
// to false is useful when we don't want to wait for flush which may have to
// wait for compaction to complete taking an indeterminate time.
//
// NOTE: Although GetLiveFiles() followed by GetSortedWalFiles() can generate
// a lossless backup, GetLiveFilesStorageInfo() is strongly recommended
// instead, because it ensures a single consistent view of all files is
// captured in one call.
virtual Status GetLiveFiles(std::vector<std::string>&,
uint64_t* manifest_file_size,
bool flush_memtable = true) = 0;
// Retrieve the sorted list of all wal files with earliest file first
virtual Status GetSortedWalFiles(VectorLogPtr& files) = 0;
// Retrieve information about the current wal file
//
// Note that the log might have rolled after this call in which case
// the current_log_file would not point to the current log file.
//
// Additionally, for the sake of optimization current_log_file->StartSequence
// would always be set to 0
virtual Status GetCurrentWalFile(
std::unique_ptr<LogFile>* current_log_file) = 0;
// IngestExternalFile() will load a list of external SST files (1) into the DB
// Two primary modes are supported:
// - Duplicate keys in the new files will overwrite exiting keys (default)
@ -1551,6 +1570,11 @@ class DB {
// (3) If IngestExternalFileOptions->ingest_behind is set to true,
// we always ingest at the bottommost level, which should be reserved
// for this purpose (see DBOPtions::allow_ingest_behind flag).
// (4) If IngestExternalFileOptions->fail_if_not_bottommost_level is set to
// true, then this method can return Status:TryAgain() indicating that
// the files cannot be ingested to the bottommost level, and it is the
// user's responsibility to clear the bottommost level in the overlapping
// range before re-attempting the ingestion.
virtual Status IngestExternalFile(
ColumnFamilyHandle* column_family,
const std::vector<std::string>& external_files,

View File

@ -1855,6 +1855,14 @@ struct IngestExternalFileOptions {
// ingestion. However, if no checksum information is provided with the
// ingested files, DB will generate the checksum and store in the Manifest.
bool verify_file_checksum = true;
// Set to TRUE if user wants file to be ingested to the bottommost level. An
// error of Status::TryAgain() will be returned if a file cannot fit in the
// bottommost level when calling
// DB::IngestExternalFile()/DB::IngestExternalFiles(). The user should clear
// the bottommost level in the overlapping range before re-attempt.
//
// ingest_behind takes precedence over fail_if_not_bottommost_level.
bool fail_if_not_bottommost_level = false;
};
enum TraceFilterType : uint64_t {
@ -1932,12 +1940,25 @@ struct CompactionServiceOptionsOverride {
std::shared_ptr<TableFactory> table_factory;
std::shared_ptr<SstPartitionerFactory> sst_partitioner_factory = nullptr;
// Only subsets of events are triggered in remote compaction worker, like:
// `OnTableFileCreated`, `OnTableFileCreationStarted`,
// `ShouldBeNotifiedOnFileIO` `OnSubcompactionBegin`,
// `OnSubcompactionCompleted`, etc. Worth mentioning, `OnCompactionBegin` and
// `OnCompactionCompleted` won't be triggered. They will be triggered on the
// primary DB side.
std::vector<std::shared_ptr<EventListener>> listeners;
// statistics is used to collect DB operation metrics, the metrics won't be
// returned to CompactionService primary host, to collect that, the user needs
// to set it here.
std::shared_ptr<Statistics> statistics = nullptr;
};
struct OpenAndCompactOptions {
// Allows cancellation of an in-progress compaction.
std::atomic<bool>* canceled = nullptr;
};
#ifndef ROCKSDB_LITE
struct LiveFilesStorageInfoOptions {
// Whether to populate FileStorageInfo::file_checksum* or leave blank

View File

@ -68,9 +68,11 @@ class SecondaryCache : public Customizable {
// Lookup the data for the given key in this cache. The create_cb
// will be used to create the object. The handle returned may not be
// ready yet, unless wait=true, in which case Lookup() will block until
// the handle is ready
// the handle is ready. is_in_sec_cache is to indicate whether the
// handle is possibly erased from the secondary cache after the Lookup.
virtual std::unique_ptr<SecondaryCacheResultHandle> Lookup(
const Slice& key, const Cache::CreateCallback& create_cb, bool wait) = 0;
const Slice& key, const Cache::CreateCallback& create_cb, bool wait,
bool& is_in_sec_cache) = 0;
// At the discretion of the implementation, erase the data associated
// with key

View File

@ -501,14 +501,15 @@ struct BlockBasedTableOptions {
// RocksDB does auto-readahead for iterators on noticing more than two reads
// for a table file if user doesn't provide readahead_size. The readahead
// starts at 8KB and doubles on every additional read upto
// max_auto_readahead_size and max_auto_readahead_size can be configured.
// starts at BlockBasedTableOptions.initial_auto_readahead_size (default: 8KB)
// and doubles on every additional read upto max_auto_readahead_size and
// max_auto_readahead_size can be configured.
//
// Special Value: 0 - If max_auto_readahead_size is set 0 then no implicit
// auto prefetching will be done. If max_auto_readahead_size provided is less
// than 8KB (which is initial readahead size used by rocksdb in case of
// auto-readahead), readahead size will remain same as
// max_auto_readahead_size.
// Special Value: 0 - If max_auto_readahead_size is set 0 then it will disable
// the implicit auto prefetching.
// If max_auto_readahead_size provided is less
// than initial_auto_readahead_size, then RocksDB will sanitize the
// initial_auto_readahead_size and set it to max_auto_readahead_size.
//
// Value should be provided along with KB i.e. 256 * 1024 as it will prefetch
// the blocks.
@ -547,6 +548,35 @@ struct BlockBasedTableOptions {
PrepopulateBlockCache prepopulate_block_cache =
PrepopulateBlockCache::kDisable;
// RocksDB does auto-readahead for iterators on noticing more than two reads
// for a table file if user doesn't provide readahead_size. The readahead size
// starts at initial_auto_readahead_size and doubles on every additional read
// upto BlockBasedTableOptions.max_auto_readahead_size.
// max_auto_readahead_size can also be configured.
//
// Scenarios:
// - If initial_auto_readahead_size is set 0 then it will disabled the
// implicit auto prefetching irrespective of max_auto_readahead_size.
// - If max_auto_readahead_size is set 0, it will disable the internal
// prefetching irrespective of initial_auto_readahead_size.
// - If initial_auto_readahead_size > max_auto_readahead_size, then RocksDB
// will sanitize the value of initial_auto_readahead_size to
// max_auto_readahead_size and readahead_size will be
// max_auto_readahead_size.
//
// Value should be provided along with KB i.e. 8 * 1024 as it will prefetch
// the blocks.
//
// This parameter can be changed dynamically by
// DB::SetOptions({{"block_based_table_factory",
// "{initial_auto_readahead_size=0;}"}}));
//
// Changing the value dynamically will only affect files opened after the
// change.
//
// Default: 8 KB (8 * 1024).
size_t initial_auto_readahead_size = 8 * 1024;
};
// Table Properties that are specific to block-based table properties.

View File

@ -0,0 +1,138 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#include <string>
#include <vector>
#include "rocksdb/merge_operator.h"
#include "rocksdb/slice.h"
namespace ROCKSDB_NAMESPACE {
// The feature is still in development so the encoding format is subject
// to change.
//
// Aggregation Merge Operator is a merge operator that allows users to
// aggregate merge operands of different keys with different registered
// aggregation functions. The aggregation can also change for the same
// key if the functions store the data in the same format.
// The target application highly overlaps with merge operator in general
// but we try to provide a better interface so that users are more likely
// to use pre-implemented plug-in functions and connect with existing
// third-party aggregation functions (such as those from SQL engines).
// In this case, the need for users to write customized C++ plug-in code
// is reduced.
// If the idea proves to useful, we might consider to move it to be
// a core functionality of RocksDB, and reduce the support of merge
// operators.
//
// Users can implement aggregation functions by implementing abstract
// class Aggregator, and register it using AddAggregator().
// The merge operator can be retrieved from GetAggMergeOperator() and
// it is a singleton.
//
// Users can push values to be updated with a merge operand encoded with
// registered function name and payload using EncodeAggFuncAndPayload(),
// and the merge operator will invoke the aggregation function.
// An example:
//
// // Assume class ExampleSumAggregator is implemented to do simple sum.
// AddAggregator("sum", std::make_unique<ExampleSumAggregator>());
// std::shared_ptr<MergeOperator> mp_guard = CreateAggMergeOperator();
// options.merge_operator = mp_guard.get();
// ...... // Creating DB
//
//
// std::string encoded_value;
// s = EncodeAggFuncAndPayload(kUnamedFuncName, "200", encoded_value);
// assert(s.ok());
// db->Put(WriteOptions(), "foo", encoded_value);
// s = EncodeAggFuncAndPayload("sum", "200", encoded_value);
// assert(s.ok());
// db->Merge(WriteOptions(), "foo", encoded_value);
// s = EncodeAggFuncAndPayload("sum", "200", encoded_value);
// assert(s.ok());
// db->Merge(WriteOptions(), "foo", encoded_value);
//
// std::string value;
// Status s = db->Get(ReadOptions, "foo", &value);
// assert(s.ok());
// Slice func, aggregated_value;
// assert(ExtractAggFuncAndValue(value, func, aggregated_value));
// assert(func == "sum");
// assert(aggregated_value == "600");
//
//
// DB::Put() can also be used to add a payloadin the same way as Merge().
//
// kUnamedFuncName can be used as a placeholder function name. This will
// be aggregated with merge operands inserted later based on function
// name given there.
//
// If the aggregation function is not registered or there is an error
// returned by aggregation function, the result will be encoded with a fake
// aggregation function kErrorFuncName, with each merge operands to be encoded
// into a list that can be extracted using ExtractList();
//
// If users add a merge operand using a different aggregation function from
// the previous one, the merge operands for the previous one is aggregated
// and the payload part of the result is treated as the first payload of
// the items for the new aggregation function. For example, users can
// Merge("plus, 1"), merge("plus 2"), merge("minus 3") and the aggregation
// result would be "minus 0".
//
// A class used to aggregate data per key/value. The plug-in function is
// implemented and registered using AddAggregator(). And then use it
// with merge operator created using CreateAggMergeOperator().
class Aggregator {
public:
virtual ~Aggregator() {}
// The input list is in reverse insertion order, with values[0] to be
// the one inserted last and values.back() to be the one inserted first.
// The oldest one might be from Get().
// Return whether aggregation succeeded. False for aggregation error.
virtual bool Aggregate(const std::vector<Slice>& values,
std::string& result) const = 0;
// True if a partial aggregation should be invoked. Some aggregators
// might opt to skip partial aggregation if possible.
virtual bool DoPartialAggregate() const { return true; }
};
// The function adds aggregation plugin by function name. It is used
// by all the aggregation operator created using CreateAggMergeOperator().
// It's currently not thread safe to run concurrently with the aggregation
// merge operator. It is recommended that all the aggregation function
// is added before calling CreateAggMergeOperator().
Status AddAggregator(const std::string& function_name,
std::unique_ptr<Aggregator>&& agg);
// Get the singleton instance of merge operator for aggregation.
// Always the same one is returned with a shared_ptr is hold as a
// static variable by the function.
// This is done so because options.merge_operator is shared_ptr.
std::shared_ptr<MergeOperator> GetAggMergeOperator();
// Encode aggregation function and payload that can be consumed by aggregation
// merge operator.
Status EncodeAggFuncAndPayload(const Slice& function_name, const Slice& payload,
std::string& output);
// Helper function to extract aggregation function name and payload.
// Return false if it fails to decode.
bool ExtractAggFuncAndValue(const Slice& op, Slice& func, Slice& value);
// Extract encoded list. This can be used to extract error merge operands when
// the returned function name is kErrorFuncName.
bool ExtractList(const Slice& encoded_list, std::vector<Slice>& decoded_list);
// Special function name that allows it to be merged to subsequent type.
extern const std::string kUnnamedFuncName;
// Special error function name reserved for merging or aggregation error.
extern const std::string kErrorFuncName;
} // namespace ROCKSDB_NAMESPACE

View File

@ -69,6 +69,8 @@ class LDBCommand {
static const std::string ARG_BLOB_GARBAGE_COLLECTION_AGE_CUTOFF;
static const std::string ARG_BLOB_GARBAGE_COLLECTION_FORCE_THRESHOLD;
static const std::string ARG_BLOB_COMPACTION_READAHEAD_SIZE;
static const std::string ARG_DECODE_BLOB_INDEX;
static const std::string ARG_DUMP_UNCOMPRESSED_BLOBS;
struct ParsedParams {
std::string cmd;

View File

@ -280,9 +280,7 @@ class ObjectRegistry {
static std::shared_ptr<ObjectRegistry> Default();
explicit ObjectRegistry(const std::shared_ptr<ObjectRegistry>& parent)
: parent_(parent) {}
explicit ObjectRegistry(const std::shared_ptr<ObjectLibrary>& library) {
libraries_.push_back(library);
}
explicit ObjectRegistry(const std::shared_ptr<ObjectLibrary>& library);
std::shared_ptr<ObjectLibrary> AddLibrary(const std::string& id) {
auto library = std::make_shared<ObjectLibrary>(id);
@ -502,6 +500,9 @@ class ObjectRegistry {
// Dump the contents of the registry to the logger
void Dump(Logger* logger) const;
// Invokes the input function to retrieve the properties for this plugin.
int RegisterPlugin(const std::string& name, const RegistrarFunc& func);
private:
static std::string ToManagedObjectKey(const std::string& type,
const std::string& id) {
@ -548,6 +549,8 @@ class ObjectRegistry {
// The libraries are searched in reverse order (back to front) when
// searching for entries.
std::vector<std::shared_ptr<ObjectLibrary>> libraries_;
std::vector<std::string> plugins_;
static std::unordered_map<std::string, RegistrarFunc> builtins_;
std::map<std::string, std::weak_ptr<Customizable>> managed_objects_;
std::shared_ptr<ObjectRegistry> parent_;
mutable std::mutex objects_mutex_; // Mutex for managed objects

View File

@ -222,6 +222,20 @@ struct TransactionDBOptions {
// pending writes into the database. A value of 0 or less means no limit.
int64_t default_write_batch_flush_threshold = 0;
// This option is valid only for write-prepared/write-unprepared. Transaction
// will rely on this callback to determine if a key should be rolled back
// with Delete or SingleDelete when necessary. If the callback returns true,
// then SingleDelete should be used. If the callback is not callable or the
// callback returns false, then a Delete is used.
// The application should ensure thread-safety of this callback.
// The callback should not throw because RocksDB is not exception-safe.
// The callback may be removed if we allow mixing Delete and SingleDelete in
// the future.
std::function<bool(TransactionDB* /*db*/,
ColumnFamilyHandle* /*column_family*/,
const Slice& /*key*/)>
rollback_deletion_type_callback;
private:
// 128 entries
// Should the default value change, please also update wp_snapshot_cache_bits

View File

@ -9,8 +9,10 @@
#include "rocksdb/rocksdb_namespace.h"
// NOTE: in 'main' development branch, this should be the *next*
// minor or major version number planned for release.
#define ROCKSDB_MAJOR 7
#define ROCKSDB_MINOR 1
#define ROCKSDB_MINOR 3
#define ROCKSDB_PATCH 0
// Do not use these. We made the mistake of declaring macros starting with

View File

@ -1428,7 +1428,7 @@ public class RocksDBTest {
assertThat(livefiles.manifestFileSize).isEqualTo(59);
assertThat(livefiles.files.size()).isEqualTo(3);
assertThat(livefiles.files.get(0)).isEqualTo("/CURRENT");
assertThat(livefiles.files.get(1)).isEqualTo("/MANIFEST-000004");
assertThat(livefiles.files.get(1)).isEqualTo("/MANIFEST-000005");
assertThat(livefiles.files.get(2)).isEqualTo("/OPTIONS-000007");
}
}

View File

@ -7,6 +7,9 @@
#include <cstddef>
#include <unordered_map>
#ifdef USE_FOLLY
#include <folly/container/F14Map.h>
#endif
#include "rocksdb/rocksdb_namespace.h"
@ -25,4 +28,11 @@ size_t ApproximateMemoryUsage(
umap.bucket_count() * sizeof(void*);
}
#ifdef USE_FOLLY
template <class Key, class Value, class Hash>
size_t ApproximateMemoryUsage(const folly::F14FastMap<Key, Value, Hash>& umap) {
return sizeof(umap) + umap.getAllocatedMemorySize();
}
#endif
} // namespace ROCKSDB_NAMESPACE

View File

@ -604,10 +604,14 @@ TEST_F(StatsHistoryTest, ForceManualFlushStatsCF) {
dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
// writing to all three cf, flush default cf
// LogNumbers: default: 14, stats: 4, pikachu: 4
// LogNumbers: default: 16, stats: 10, pikachu: 5
// Since in recovery process, cfd_stats column is created after WAL is
// created, synced and MANIFEST is persisted, its log number which depends on
// logfile_number_ will be different. Since "pikachu" is never flushed, thus
// its log_number should be the smallest of the three.
ASSERT_OK(Flush());
ASSERT_EQ(cfd_stats->GetLogNumber(), cfd_test->GetLogNumber());
ASSERT_LT(cfd_stats->GetLogNumber(), cfd_default->GetLogNumber());
ASSERT_LT(cfd_test->GetLogNumber(), cfd_stats->GetLogNumber());
ASSERT_LT(cfd_test->GetLogNumber(), cfd_default->GetLogNumber());
ASSERT_OK(Put("foo1", "v1"));
ASSERT_OK(Put("bar1", "v1"));

View File

@ -1329,7 +1329,8 @@ class TestSecondaryCache : public SecondaryCache {
}
std::unique_ptr<SecondaryCacheResultHandle> Lookup(
const Slice& /*key*/, const Cache::CreateCallback& /*create_cb*/,
bool /*wait*/) override {
bool /*wait*/, bool& is_in_sec_cache) override {
is_in_sec_cache = true;
return nullptr;
}
void Erase(const Slice& /*key*/) override {}

View File

@ -34,6 +34,7 @@ namespace ROCKSDB_NAMESPACE {
#ifndef ROCKSDB_LITE
#if defined OS_LINUX || defined OS_WIN
#ifndef __clang__
#ifndef ROCKSDB_UBSAN_RUN
class OptionsSettableTest : public testing::Test {
public:
@ -195,7 +196,8 @@ TEST_F(OptionsSettableTest, BlockBasedTableOptionsAllFieldsSettable) {
"enable_index_compression=false;"
"block_align=true;"
"max_auto_readahead_size=0;"
"prepopulate_block_cache=kDisable",
"prepopulate_block_cache=kDisable;"
"initial_auto_readahead_size=0",
new_bbto));
ASSERT_EQ(unset_bytes_base,
@ -580,6 +582,7 @@ TEST_F(OptionsSettableTest, ColumnFamilyOptionsAllFieldsSettable) {
delete[] mcfo2_ptr;
delete[] cfo_clean_ptr;
}
#endif // !ROCKSDB_UBSAN_RUN
#endif // !__clang__
#endif // OS_LINUX || OS_WIN
#endif // !ROCKSDB_LITE

View File

@ -15,6 +15,10 @@
#endif
#endif
#define DECLARE_DEFAULT_MOVES(Name) \
Name(Name&&) noexcept = default; \
Name& operator=(Name&&) = default
// ASAN (Address sanitizer)
#if defined(__clang__)

16
src.mk
View File

@ -6,7 +6,7 @@ LIB_SOURCES = \
cache/cache_reservation_manager.cc \
cache/clock_cache.cc \
cache/lru_cache.cc \
cache/lru_secondary_cache.cc \
cache/compressed_secondary_cache.cc \
cache/sharded_cache.cc \
db/arena_wrapped_db_iter.cc \
db/blob/blob_fetcher.cc \
@ -232,6 +232,7 @@ LIB_SOURCES = \
util/thread_local.cc \
util/threadpool_imp.cc \
util/xxhash.cc \
utilities/agg_merge/agg_merge.cc \
utilities/backup/backup_engine.cc \
utilities/blob_db/blob_compaction_filter.cc \
utilities/blob_db/blob_db.cc \
@ -364,14 +365,14 @@ TEST_LIB_SOURCES = \
test_util/mock_time_env.cc \
test_util/testharness.cc \
test_util/testutil.cc \
utilities/agg_merge/test_agg_merge.cc \
utilities/cassandra/test_utils.cc \
FOLLY_SOURCES = \
third-party/folly/folly/detail/Futex.cpp \
third-party/folly/folly/synchronization/AtomicNotification.cpp \
third-party/folly/folly/synchronization/DistributedMutex.cpp \
third-party/folly/folly/synchronization/ParkingLot.cpp \
third-party/folly/folly/synchronization/WaitOptions.cpp \
$(FOLLY_DIR)/folly/container/detail/F14Table.cpp \
$(FOLLY_DIR)/folly/lang/SafeAssert.cpp \
$(FOLLY_DIR)/folly/lang/ToAscii.cpp \
$(FOLLY_DIR)/folly/ScopeGuard.cpp \
TOOLS_MAIN_SOURCES = \
db_stress_tool/db_stress.cc \
@ -402,7 +403,7 @@ TEST_MAIN_SOURCES = \
cache/cache_test.cc \
cache/cache_reservation_manager_test.cc \
cache/lru_cache_test.cc \
cache/lru_secondary_cache_test.cc \
cache/compressed_secondary_cache_test.cc \
db/blob/blob_counting_iterator_test.cc \
db/blob/blob_file_addition_test.cc \
db/blob/blob_file_builder_test.cc \
@ -560,6 +561,7 @@ TEST_MAIN_SOURCES = \
util/thread_list_test.cc \
util/thread_local_test.cc \
util/work_queue_test.cc \
utilities/agg_merge/agg_merge_test.cc \
utilities/backup/backup_engine_test.cc \
utilities/blob_db/blob_db_test.cc \
utilities/cassandra/cassandra_format_test.cc \

View File

@ -413,6 +413,10 @@ static std::unordered_map<std::string, OptionTypeInfo>
offsetof(struct BlockBasedTableOptions, prepopulate_block_cache),
&block_base_table_prepopulate_block_cache_string_map,
OptionTypeFlags::kMutable)},
{"initial_auto_readahead_size",
{offsetof(struct BlockBasedTableOptions, initial_auto_readahead_size),
OptionType::kSizeT, OptionVerificationType::kNormal,
OptionTypeFlags::kMutable}},
#endif // ROCKSDB_LITE
};
@ -815,6 +819,10 @@ std::string BlockBasedTableFactory::GetPrintableOptions() const {
snprintf(buffer, kBufferSize, " prepopulate_block_cache: %d\n",
static_cast<int>(table_options_.prepopulate_block_cache));
ret.append(buffer);
snprintf(buffer, kBufferSize,
" initial_auto_readahead_size: %" ROCKSDB_PRIszt "\n",
table_options_.initial_auto_readahead_size);
ret.append(buffer);
return ret;
}

Some files were not shown because too many files have changed in this diff Show More