Compare commits

..

No commits in common. "main" and "yuslepukhin" have entirely different histories.

1350 changed files with 71681 additions and 226184 deletions

View File

@ -3,549 +3,80 @@ version: 2.1
orbs:
win: circleci/windows@2.4.0
aliases:
- &notify-on-main-failure
fail_only: true
only_for_branches: main
commands:
install-cmake-on-macos:
steps:
- run:
name: Install cmake on macos
command: |
HOMEBREW_NO_AUTO_UPDATE=1 brew install cmake
install-jdk8-on-macos:
steps:
- run:
name: Install JDK 8 on macos
command: |
brew install --cask adoptopenjdk/openjdk/adoptopenjdk8
increase-max-open-files-on-macos:
steps:
- run:
name: Increase max open files
command: |
sudo sysctl -w kern.maxfiles=1048576
sudo sysctl -w kern.maxfilesperproc=1048576
sudo launchctl limit maxfiles 1048576
pre-steps:
steps:
- checkout
- run:
name: Setup Environment Variables
command: |
echo "export GTEST_THROW_ON_FAILURE=0" >> $BASH_ENV
echo "export GTEST_OUTPUT=\"xml:/tmp/test-results/\"" >> $BASH_ENV
echo "export SKIP_FORMAT_BUCK_CHECKS=1" >> $BASH_ENV
echo "export GTEST_COLOR=1" >> $BASH_ENV
echo "export CTEST_OUTPUT_ON_FAILURE=1" >> $BASH_ENV
echo "export CTEST_TEST_TIMEOUT=300" >> $BASH_ENV
echo "export ZLIB_DOWNLOAD_BASE=https://rocksdb-deps.s3.us-west-2.amazonaws.com/pkgs/zlib" >> $BASH_ENV
echo "export BZIP2_DOWNLOAD_BASE=https://rocksdb-deps.s3.us-west-2.amazonaws.com/pkgs/bzip2" >> $BASH_ENV
echo "export SNAPPY_DOWNLOAD_BASE=https://rocksdb-deps.s3.us-west-2.amazonaws.com/pkgs/snappy" >> $BASH_ENV
echo "export LZ4_DOWNLOAD_BASE=https://rocksdb-deps.s3.us-west-2.amazonaws.com/pkgs/lz4" >> $BASH_ENV
echo "export ZSTD_DOWNLOAD_BASE=https://rocksdb-deps.s3.us-west-2.amazonaws.com/pkgs/zstd" >> $BASH_ENV
pre-steps-macos:
steps:
- pre-steps
post-steps:
steps:
- store_test_results: # store test result if there's any
path: /tmp/test-results
- store_artifacts: # store LOG for debugging if there's any
path: LOG
- run: # on fail, compress Test Logs for diagnosing the issue
name: Compress Test Logs
command: tar -cvzf t.tar.gz t
when: on_fail
- store_artifacts: # on fail, store Test Logs for diagnosing the issue
path: t.tar.gz
destination: test_logs
when: on_fail
install-clang-10:
steps:
- run:
name: Install Clang 10
command: |
wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
echo "deb http://apt.llvm.org/focal/ llvm-toolchain-focal-10 main" | sudo tee -a /etc/apt/sources.list
echo "deb-src http://apt.llvm.org/focal/ llvm-toolchain-focal-10 main" | sudo tee -a /etc/apt/sources.list
echo "APT::Acquire::Retries \"10\";" | sudo tee -a /etc/apt/apt.conf.d/80-retries # llvm.org unreliable
sudo apt-get update -y && sudo apt-get install -y clang-10
install-clang-13:
steps:
- run:
name: Install Clang 13
command: |
echo "deb http://apt.llvm.org/focal/ llvm-toolchain-focal-13 main" | sudo tee -a /etc/apt/sources.list
echo "deb-src http://apt.llvm.org/focal/ llvm-toolchain-focal-13 main" | sudo tee -a /etc/apt/sources.list
echo "APT::Acquire::Retries \"10\";" | sudo tee -a /etc/apt/apt.conf.d/80-retries # llvm.org unreliable
wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key|sudo apt-key add -
sudo apt-get update -y && sudo apt-get install -y clang-13
install-gflags:
steps:
- run:
name: Install gflags
command: |
sudo apt-get update -y && sudo apt-get install -y libgflags-dev
install-benchmark:
steps:
- run:
name: Install ninja build
command: sudo apt-get update -y && sudo apt-get install -y ninja-build
- run:
name: Install benchmark
command: |
git clone --depth 1 --branch v1.6.1 https://github.com/google/benchmark.git ~/benchmark
cd ~/benchmark && mkdir build && cd build
cmake .. -GNinja -DCMAKE_BUILD_TYPE=Release -DBENCHMARK_ENABLE_GTEST_TESTS=0
ninja && sudo ninja install
install-valgrind:
steps:
- run:
name: Install valgrind
command: sudo apt-get update -y && sudo apt-get install -y valgrind
upgrade-cmake:
steps:
- run:
name: Upgrade cmake
command: |
sudo apt remove --purge cmake
sudo snap install cmake --classic
install-gflags-on-macos:
steps:
- run:
name: Install gflags on macos
command: |
HOMEBREW_NO_AUTO_UPDATE=1 brew install gflags
install-gtest-parallel:
steps:
- run:
name: Install gtest-parallel
command: |
git clone --single-branch --branch master --depth 1 https://github.com/google/gtest-parallel.git ~/gtest-parallel
echo 'export PATH=$HOME/gtest-parallel:$PATH' >> $BASH_ENV
install-compression-libs:
steps:
- run:
name: Install compression libs
command: |
sudo apt-get update -y && sudo apt-get install -y libsnappy-dev zlib1g-dev libbz2-dev liblz4-dev libzstd-dev
install-libprotobuf-mutator:
steps:
- run:
name: Install libprotobuf-mutator libs
command: |
git clone --single-branch --branch master --depth 1 git@github.com:google/libprotobuf-mutator.git ~/libprotobuf-mutator
cd ~/libprotobuf-mutator && mkdir build && cd build
cmake .. -GNinja -DCMAKE_C_COMPILER=clang-13 -DCMAKE_CXX_COMPILER=clang++-13 -DCMAKE_BUILD_TYPE=Release -DLIB_PROTO_MUTATOR_DOWNLOAD_PROTOBUF=ON
ninja && sudo ninja install
- run:
name: Setup environment variables
command: |
echo "export PKG_CONFIG_PATH=/usr/local/OFF/:~/libprotobuf-mutator/build/external.protobuf/lib/pkgconfig/" >> $BASH_ENV
echo "export PROTOC_BIN=~/libprotobuf-mutator/build/external.protobuf/bin/protoc" >> $BASH_ENV
executors:
windows-2xlarge:
machine:
image: 'windows-server-2019-vs2019:stable'
image: 'windows-server-2019-vs2019:201908-06'
resource_class: windows.2xlarge
shell: bash.exe
jobs:
build-macos:
macos:
xcode: 12.5.1
resource_class: large
environment:
ROCKSDB_DISABLE_JEMALLOC: 1 # jemalloc cause env_test hang, disable it for now
steps:
- increase-max-open-files-on-macos
- install-gflags-on-macos
- pre-steps-macos
- run: ulimit -S -n `ulimit -H -n` && OPT=-DCIRCLECI make V=1 J=32 -j32 all
- post-steps
build-macos-cmake:
macos:
xcode: 12.5.1
resource_class: large
parameters:
run_even_tests:
description: run even or odd tests, used to split tests to 2 groups
type: boolean
default: true
steps:
- increase-max-open-files-on-macos
- install-cmake-on-macos
- install-gflags-on-macos
- pre-steps-macos
- run:
name: "cmake generate project file"
command: ulimit -S -n `ulimit -H -n` && mkdir build && cd build && cmake -DWITH_GFLAGS=1 ..
- run:
name: "Build tests"
command: cd build && make V=1 -j32
- when:
condition: << parameters.run_even_tests >>
steps:
- run:
name: "Run even tests"
command: ulimit -S -n `ulimit -H -n` && cd build && ctest -j32 -I 0,,2
- when:
condition:
not: << parameters.run_even_tests >>
steps:
- run:
name: "Run odd tests"
command: ulimit -S -n `ulimit -H -n` && cd build && ctest -j32 -I 1,,2
- post-steps
build-linux:
machine:
image: ubuntu-2004:202111-02
image: ubuntu-1604:201903-01
resource_class: 2xlarge
steps:
- pre-steps
- install-gflags
- run: make V=1 J=32 -j32 check
- post-steps
build-linux-encrypted_env-no_compression:
machine:
image: ubuntu-2004:202111-02
resource_class: 2xlarge
steps:
- pre-steps
- install-gflags
- run: ENCRYPTED_ENV=1 ROCKSDB_DISABLE_SNAPPY=1 ROCKSDB_DISABLE_ZLIB=1 ROCKSDB_DISABLE_BZIP=1 ROCKSDB_DISABLE_LZ4=1 ROCKSDB_DISABLE_ZSTD=1 make V=1 J=32 -j32 check
- run: |
./sst_dump --help | egrep -q 'Supported compression types: kNoCompression$' # Verify no compiled in compression
- post-steps
build-linux-shared_lib-alt_namespace-status_checked:
machine:
image: ubuntu-2004:202111-02
resource_class: 2xlarge
steps:
- pre-steps
- install-gflags
- run: ASSERT_STATUS_CHECKED=1 TEST_UINT128_COMPAT=1 ROCKSDB_MODIFY_NPHASH=1 LIB_MODE=shared OPT="-DROCKSDB_NAMESPACE=alternative_rocksdb_ns" make V=1 -j32 check
- post-steps
- checkout # check out the code in the project directory
- run: pyenv global 3.5.2
- run: sudo apt-get update -y
- run: sudo apt-get install -y libgflags-dev
- run: SKIP_FORMAT_BUCK_CHECKS=1 PRINT_PARALLEL_OUTPUTS=1 make J=32 all check -j32
build-linux-release:
machine:
image: ubuntu-2004:202111-02
image: ubuntu-1604:201903-01
resource_class: 2xlarge
steps:
- checkout # check out the code in the project directory
- run: make V=1 -j32 release
- run: if ./db_stress --version; then false; else true; fi # ensure without gflags
- install-gflags
- run: make V=1 -j32 release
- run: ./db_stress --version # ensure with gflags
- post-steps
build-linux-release-rtti:
machine:
image: ubuntu-2004:202111-02
resource_class: xlarge
steps:
- checkout # check out the code in the project directory
- run: make clean
- run: USE_RTTI=1 DEBUG_LEVEL=0 make V=1 -j16 static_lib tools db_bench
- run: if ./db_stress --version; then false; else true; fi # ensure without gflags
- run: sudo apt-get update -y && sudo apt-get install -y libgflags-dev
- run: make clean
- run: USE_RTTI=1 DEBUG_LEVEL=0 make V=1 -j16 static_lib tools db_bench
- run: ./db_stress --version # ensure with gflags
- run: make release -j32
build-linux-lite:
machine:
image: ubuntu-2004:202111-02
resource_class: large
image: ubuntu-1604:201903-01
resource_class: 2xlarge
steps:
- pre-steps
- install-gflags
- run: LITE=1 make V=1 J=8 -j8 check
- post-steps
- checkout # check out the code in the project directory
- run: pyenv global 3.5.2
- run: SKIP_FORMAT_BUCK_CHECKS=1 PRINT_PARALLEL_OUTPUTS=1 LITE=1 make J=32 all check -j32
build-linux-lite-release:
machine:
image: ubuntu-2004:202111-02
image: ubuntu-1604:201903-01
resource_class: large
steps:
- checkout # check out the code in the project directory
- run: LITE=1 make V=1 -j8 release
- run: if ./db_stress --version; then false; else true; fi # ensure without gflags
- install-gflags
- run: LITE=1 make V=1 -j8 release
- run: ./db_stress --version # ensure with gflags
- post-steps
- run: make release -j32
build-linux-clang-no_test_run:
build-linux-clang-no-test:
machine:
image: ubuntu-2004:202111-02
resource_class: xlarge
image: ubuntu-1604:201903-01
resource_class: 2xlarge
steps:
- checkout # check out the code in the project directory
- run: sudo apt-get update -y && sudo apt-get install -y clang libgflags-dev libtbb-dev
- run: CC=clang CXX=clang++ USE_CLANG=1 PORTABLE=1 make V=1 -j16 all
- post-steps
- run: USE_CLANG=1 make all -j32
build-linux-clang10-asan:
build-linux-cmake:
machine:
image: ubuntu-2004:202111-02
image: ubuntu-1604:201903-01
resource_class: 2xlarge
steps:
- pre-steps
- install-gflags
- install-clang-10
- run: COMPILE_WITH_ASAN=1 CC=clang-10 CXX=clang++-10 ROCKSDB_DISABLE_ALIGNED_NEW=1 USE_CLANG=1 make V=1 -j32 check # aligned new doesn't work for reason we haven't figured out
- post-steps
build-linux-clang10-mini-tsan:
machine:
image: ubuntu-2004:202111-02
resource_class: 2xlarge
# find test list by `make list_all_tests`
parameters:
start_test:
default: ""
type: string
end_test:
default: ""
type: string
steps:
- pre-steps
- install-gflags
- install-clang-10
- install-gtest-parallel
- run:
name: "Build unit tests"
command: |
echo "env: $(env)"
ROCKSDBTESTS_START=<<parameters.start_test>> ROCKSDBTESTS_END=<<parameters.end_test>> ROCKSDBTESTS_SUBSET_TESTS_TO_FILE=/tmp/test_list COMPILE_WITH_TSAN=1 CC=clang-10 CXX=clang++-10 ROCKSDB_DISABLE_ALIGNED_NEW=1 USE_CLANG=1 make V=1 -j32 --output-sync=target build_subset_tests
- run:
name: "Run unit tests in parallel"
command: |
sed -i 's/[[:space:]]*$//; s/ / \.\//g; s/.*/.\/&/' /tmp/test_list
cat /tmp/test_list
gtest-parallel $(</tmp/test_list) --output_dir=/tmp | cat # pipe to cat to continuously output status on circleci UI. Otherwise, no status will be printed while the job is running.
- post-steps
build-linux-clang10-ubsan:
machine:
image: ubuntu-2004:202111-02
resource_class: 2xlarge
steps:
- pre-steps
- install-gflags
- install-clang-10
- run: COMPILE_WITH_UBSAN=1 OPT="-fsanitize-blacklist=.circleci/ubsan_suppression_list.txt" CC=clang-10 CXX=clang++-10 ROCKSDB_DISABLE_ALIGNED_NEW=1 USE_CLANG=1 make V=1 -j32 ubsan_check # aligned new doesn't work for reason we haven't figured out
- post-steps
build-linux-valgrind:
machine:
image: ubuntu-2004:202111-02
resource_class: 2xlarge
steps:
- pre-steps
- install-gflags
- install-valgrind
- run: PORTABLE=1 make V=1 -j32 valgrind_test
- post-steps
build-linux-clang10-clang-analyze:
machine:
image: ubuntu-2004:202111-02
resource_class: 2xlarge
steps:
- pre-steps
- install-gflags
- install-clang-10
- run: sudo apt-get update -y && sudo apt-get install -y clang-tools-10
- run: CC=clang-10 CXX=clang++-10 ROCKSDB_DISABLE_ALIGNED_NEW=1 CLANG_ANALYZER="/usr/bin/clang++-10" CLANG_SCAN_BUILD=scan-build-10 USE_CLANG=1 make V=1 -j32 analyze # aligned new doesn't work for reason we haven't figured out. For unknown, reason passing "clang++-10" as CLANG_ANALYZER doesn't work, and we need a full path.
- post-steps
build-linux-cmake-with-folly:
machine:
image: ubuntu-2004:202111-02
resource_class: 2xlarge
steps:
- pre-steps
- install-gflags
- upgrade-cmake
- run: make checkout_folly
- run: (mkdir build && cd build && cmake -DUSE_FOLLY=1 -DWITH_GFLAGS=1 .. && make V=1 -j20 && ctest -j20)
- post-steps
build-linux-cmake-with-benchmark:
machine:
image: ubuntu-2004:202111-02
resource_class: 2xlarge
steps:
- pre-steps
- install-gflags
- install-benchmark
- run: (mkdir build && cd build && cmake -DWITH_GFLAGS=1 -DWITH_BENCHMARK=1 .. && make V=1 -j20 && ctest -j20)
- post-steps
build-linux-unity-and-headers:
docker: # executor type
- image: gcc:latest
environment:
EXTRA_CXXFLAGS: -mno-avx512f # Warnings-as-error in avx512fintrin.h, would be used on newer hardware
resource_class: large
steps:
- checkout # check out the code in the project directory
- run: apt-get update -y && apt-get install -y libgflags-dev
- run: make V=1 -j8 unity_test
- run: make V=1 -j8 -k check-headers # could be moved to a different build
- post-steps
build-linux-gcc-7-with-folly:
machine:
image: ubuntu-2004:202111-02
resource_class: 2xlarge
steps:
- pre-steps
- run: sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test && sudo apt-get update -y && sudo apt-get install gcc-7 g++-7 libgflags-dev
- run: make checkout_folly
- run: USE_FOLLY=1 CC=gcc-7 CXX=g++-7 V=1 make -j32 check
- post-steps
build-linux-gcc-8-no_test_run:
machine:
image: ubuntu-2004:202111-02
resource_class: xlarge
steps:
- pre-steps
- run: sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test && sudo apt-get update -y && sudo apt-get install gcc-8 g++-8 libgflags-dev
- run: CC=gcc-8 CXX=g++-8 V=1 make -j16 all
- post-steps
build-linux-gcc-10-cxx20-no_test_run:
machine:
image: ubuntu-2004:202111-02
resource_class: xlarge
steps:
- pre-steps
- run: sudo apt-get update -y && sudo apt-get install gcc-10 g++-10 libgflags-dev
- run: CC=gcc-10 CXX=g++-10 V=1 ROCKSDB_CXX_STANDARD=c++20 make -j16 all
- post-steps
build-linux-gcc-11-no_test_run:
machine:
image: ubuntu-2004:202111-02
resource_class: xlarge
steps:
- pre-steps
- run: sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test && sudo apt-get update -y && sudo apt-get install gcc-11 g++-11 libgflags-dev
- install-benchmark
- run: CC=gcc-11 CXX=g++-11 V=1 make -j16 all microbench
- post-steps
build-linux-clang-13-no_test_run:
machine:
image: ubuntu-2004:202111-02
resource_class: xlarge
steps:
- pre-steps
- install-clang-13
- install-benchmark
- run: CC=clang-13 CXX=clang++-13 USE_CLANG=1 make -j16 all microbench
- post-steps
# Ensure ASAN+UBSAN with folly, and full testsuite with clang 13
build-linux-clang-13-asan-ubsan-with-folly:
machine:
image: ubuntu-2004:202111-02
resource_class: 2xlarge
steps:
- pre-steps
- install-clang-13
- install-gflags
- run: make checkout_folly
- run: CC=clang-13 CXX=clang++-13 USE_CLANG=1 USE_FOLLY=1 COMPILE_WITH_UBSAN=1 COMPILE_WITH_ASAN=1 make -j32 check
- post-steps
# This job is only to make sure the microbench tests are able to run, the benchmark result is not meaningful as the CI host is changing.
build-linux-run-microbench:
machine:
image: ubuntu-2004:202111-02
resource_class: 2xlarge
steps:
- pre-steps
- install-benchmark
- run: DEBUG_LEVEL=0 make -j32 run_microbench
- post-steps
build-linux-mini-crashtest:
machine:
image: ubuntu-2004:202111-02
resource_class: large
steps:
- pre-steps
- install-gflags
- install-compression-libs
- run: ulimit -S -n `ulimit -H -n` && make V=1 -j8 CRASH_TEST_EXT_ARGS=--duration=960 blackbox_crash_test_with_atomic_flush
- post-steps
- run: mkdir build && cd build && cmake -DWITH_GFLAGS=0 .. && make -j32
build-windows:
executor: windows-2xlarge
parameters:
extra_cmake_opt:
default: ""
type: string
vs_year:
default: "2019"
type: string
cmake_generator:
default: "Visual Studio 16 2019"
type: string
environment:
THIRDPARTY_HOME: C:/Users/circleci/thirdparty
CMAKE_HOME: C:/Users/circleci/thirdparty/cmake-3.16.4-win64-x64
CMAKE_BIN: C:/Users/circleci/thirdparty/cmake-3.16.4-win64-x64/bin/cmake.exe
CMAKE_GENERATOR: Visual Studio 16 2019
SNAPPY_HOME: C:/Users/circleci/thirdparty/snappy-1.1.7
SNAPPY_INCLUDE: C:/Users/circleci/thirdparty/snappy-1.1.7;C:/Users/circleci/thirdparty/snappy-1.1.7/build
SNAPPY_LIB_DEBUG: C:/Users/circleci/thirdparty/snappy-1.1.7/build/Debug/snappy.lib
VS_YEAR: <<parameters.vs_year>>
CMAKE_GENERATOR: <<parameters.cmake_generator>>
steps:
- checkout
- run:
name: "Setup VS"
command: |
if [[ "${VS_YEAR}" == "2019" ]]; then
echo "VS2019 already present."
elif [[ "${VS_YEAR}" == "2017" ]]; then
echo "Installing VS2017..."
powershell .circleci/vs2017_install.ps1
elif [[ "${VS_YEAR}" == "2015" ]]; then
echo "Installing VS2015..."
powershell .circleci/vs2015_install.ps1
fi
- store_artifacts:
path: \Users\circleci\AppData\Local\Temp\vslogs.zip
- run:
name: "Install thirdparty dependencies"
command: |
@ -567,345 +98,34 @@ jobs:
command: |
mkdir build
cd build
${CMAKE_BIN} -G "${CMAKE_GENERATOR}" -DCMAKE_BUILD_TYPE=Debug -DOPTDBG=1 -DPORTABLE=1 -DSNAPPY=1 -DJNI=1 << parameters.extra_cmake_opt >> ..
${CMAKE_BIN} -G "${CMAKE_GENERATOR}" -DCMAKE_BUILD_TYPE=Debug -DOPTDBG=1 -DPORTABLE=1 -DSNAPPY=1 -DJNI=1 ..
cd ..
echo "Building with VS version: ${CMAKE_GENERATOR}"
msbuild.exe build/rocksdb.sln -maxCpuCount -property:Configuration=Debug -property:Platform=x64
- run:
name: "Test RocksDB"
shell: powershell.exe
command: |
build_tools\run_ci_db_test.ps1 -SuiteRun db_basic_test,db_test,db_test2,db_merge_operand_test,bloom_test,c_test,coding_test,crc32c_test,dynamic_bloom_test,env_basic_test,env_test,hash_test,random_test -Concurrency 16
build-linux-java:
machine:
image: ubuntu-2004:202111-02
resource_class: large
environment:
JAVA_HOME: /usr/lib/jvm/java-1.8.0-openjdk-amd64
steps:
- pre-steps
- install-gflags
- run:
name: "Set Java Environment"
command: |
echo "JAVA_HOME=${JAVA_HOME}"
echo 'export PATH=$JAVA_HOME/bin:$PATH' >> $BASH_ENV
which java && java -version
which javac && javac -version
- run:
name: "Test RocksDBJava"
command: make V=1 J=8 -j8 jtest
- post-steps
build-linux-java-static:
machine:
image: ubuntu-2004:202111-02
resource_class: large
environment:
JAVA_HOME: /usr/lib/jvm/java-1.8.0-openjdk-amd64
steps:
- pre-steps
- install-gflags
- run:
name: "Set Java Environment"
command: |
echo "JAVA_HOME=${JAVA_HOME}"
echo 'export PATH=$JAVA_HOME/bin:$PATH' >> $BASH_ENV
which java && java -version
which javac && javac -version
- run:
name: "Build RocksDBJava Static Library"
command: make V=1 J=8 -j8 rocksdbjavastatic
- post-steps
build-macos-java:
macos:
xcode: 12.5.1
resource_class: large
environment:
JAVA_HOME: /Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home
ROCKSDB_DISABLE_JEMALLOC: 1 # jemalloc causes java 8 crash
steps:
- increase-max-open-files-on-macos
- install-gflags-on-macos
- install-jdk8-on-macos
- pre-steps-macos
- run:
name: "Set Java Environment"
command: |
echo "JAVA_HOME=${JAVA_HOME}"
echo 'export PATH=$JAVA_HOME/bin:$PATH' >> $BASH_ENV
which java && java -version
which javac && javac -version
- run:
name: "Test RocksDBJava"
command: make V=1 J=16 -j16 jtest
- post-steps
build-macos-java-static:
macos:
xcode: 12.5.1
resource_class: large
environment:
JAVA_HOME: /Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home
steps:
- increase-max-open-files-on-macos
- install-gflags-on-macos
- install-cmake-on-macos
- install-jdk8-on-macos
- pre-steps-macos
- run:
name: "Set Java Environment"
command: |
echo "JAVA_HOME=${JAVA_HOME}"
echo 'export PATH=$JAVA_HOME/bin:$PATH' >> $BASH_ENV
which java && java -version
which javac && javac -version
- run:
name: "Build RocksDBJava x86 and ARM Static Libraries"
command: make V=1 J=16 -j16 rocksdbjavastaticosx
- post-steps
build-macos-java-static-universal:
macos:
xcode: 12.5.1
resource_class: large
environment:
JAVA_HOME: /Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home
steps:
- increase-max-open-files-on-macos
- install-gflags-on-macos
- install-cmake-on-macos
- install-jdk8-on-macos
- pre-steps-macos
- run:
name: "Set Java Environment"
command: |
echo "JAVA_HOME=${JAVA_HOME}"
echo 'export PATH=$JAVA_HOME/bin:$PATH' >> $BASH_ENV
which java && java -version
which javac && javac -version
- run:
name: "Build RocksDBJava Universal Binary Static Library"
command: make V=1 J=16 -j16 rocksdbjavastaticosx_ub
- post-steps
build-examples:
machine:
image: ubuntu-2004:202111-02
resource_class: large
steps:
- pre-steps
- install-gflags
- run:
name: "Build examples"
command: |
OPT=-DTRAVIS V=1 make -j4 static_lib && cd examples && make -j4
- post-steps
build-cmake-mingw:
machine:
image: ubuntu-2004:202111-02
resource_class: large
steps:
- pre-steps
- install-gflags
- run: sudo apt-get update -y && sudo apt-get install -y mingw-w64
- run: sudo update-alternatives --set x86_64-w64-mingw32-g++ /usr/bin/x86_64-w64-mingw32-g++-posix
- run:
name: "Build cmake-mingw"
command: |
sudo apt-get install snapd && sudo snap install cmake --beta --classic
export PATH=/snap/bin:$PATH
sudo apt-get install -y openjdk-8-jdk
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
export PATH=$JAVA_HOME/bin:$PATH
echo "JAVA_HOME=${JAVA_HOME}"
which java && java -version
which javac && javac -version
mkdir build && cd build && cmake -DJNI=1 -DWITH_GFLAGS=OFF .. -DCMAKE_C_COMPILER=x86_64-w64-mingw32-gcc -DCMAKE_CXX_COMPILER=x86_64-w64-mingw32-g++ -DCMAKE_SYSTEM_NAME=Windows && make -j4 rocksdb rocksdbjni
- post-steps
build-linux-non-shm:
machine:
image: ubuntu-2004:202111-02
resource_class: 2xlarge
environment:
TEST_TMPDIR: /tmp/rocksdb_test_tmp
steps:
- pre-steps
- install-gflags
- run: make V=1 -j32 check
- post-steps
build-linux-arm-test-full:
machine:
image: ubuntu-2004:202111-02
resource_class: arm.large
steps:
- pre-steps
- install-gflags
- run: make V=1 J=4 -j4 check
- post-steps
build-linux-arm:
machine:
image: ubuntu-2004:202111-02
resource_class: arm.large
steps:
- pre-steps
- install-gflags
- run: ROCKSDBTESTS_PLATFORM_DEPENDENT=only make V=1 J=4 -j4 all_but_some_tests check_some
- post-steps
build-linux-arm-cmake-no_test_run:
machine:
image: ubuntu-2004:202111-02
resource_class: arm.large
environment:
JAVA_HOME: /usr/lib/jvm/java-8-openjdk-arm64
steps:
- pre-steps
- install-gflags
- run:
name: "Set Java Environment"
command: |
echo "JAVA_HOME=${JAVA_HOME}"
echo 'export PATH=$JAVA_HOME/bin:$PATH' >> $BASH_ENV
which java && java -version
which javac && javac -version
- run:
name: "Build with cmake"
command: |
mkdir build
cd build
cmake -DCMAKE_BUILD_TYPE=Release -DWITH_TESTS=0 -DWITH_GFLAGS=1 -DWITH_BENCHMARK_TOOLS=0 -DWITH_TOOLS=0 -DWITH_CORE_TOOLS=1 ..
make -j4
- run:
name: "Build Java with cmake"
command: |
rm -rf build
mkdir build
cd build
cmake -DJNI=1 -DCMAKE_BUILD_TYPE=Release -DWITH_GFLAGS=1 ..
make -j4 rocksdb rocksdbjni
- post-steps
build-format-compatible:
machine:
image: ubuntu-2004:202111-02
resource_class: 2xlarge
steps:
- pre-steps
- install-gflags
- install-compression-libs
- run:
name: "test"
command: |
export TEST_TMPDIR=/dev/shm/rocksdb
rm -rf /dev/shm/rocksdb
mkdir /dev/shm/rocksdb
tools/check_format_compatible.sh
- post-steps
build-fuzzers:
machine:
image: ubuntu-2004:202111-02
resource_class: large
steps:
- pre-steps
- install-clang-13
- run: sudo apt-get update -y && sudo apt-get install -y cmake ninja-build binutils liblzma-dev libz-dev pkg-config autoconf libtool
- install-libprotobuf-mutator
- run:
name: "Build rocksdb lib"
command: CC=clang-13 CXX=clang++-13 USE_CLANG=1 make -j4 static_lib
- run:
name: "Build fuzzers"
command: cd fuzz && make sst_file_writer_fuzzer db_fuzzer db_map_fuzzer
- post-steps
build_tools\run_ci_db_test.ps1 -SuiteRun db_basic_test,db_test,db_test2,env_basic_test,env_test,db_merge_operand_test -Concurrency 16
workflows:
version: 2
jobs-linux-run-tests:
build-linux:
jobs:
- build-linux
- build-linux-cmake-with-folly
- build-linux-gcc-7-with-folly
- build-linux-cmake-with-benchmark
- build-linux-encrypted_env-no_compression
- build-linux-lite
jobs-linux-run-tests-san:
build-linux-lite:
jobs:
- build-linux-clang10-asan
- build-linux-clang10-ubsan
- build-linux-clang10-mini-tsan:
start_test: ""
end_test: "env_test"
- build-linux-clang10-mini-tsan:
start_test: "env_test"
end_test: ""
- build-linux-shared_lib-alt_namespace-status_checked
jobs-linux-no-test-run:
- build-linux-lite
build-linux-release:
jobs:
- build-linux-release
- build-linux-release-rtti
build-linux-lite-release:
jobs:
- build-linux-lite-release
- build-examples
- build-fuzzers
- build-linux-clang-no_test_run
- build-linux-clang-13-no_test_run
- build-linux-gcc-8-no_test_run
- build-linux-gcc-10-cxx20-no_test_run
- build-linux-gcc-11-no_test_run
- build-linux-arm-cmake-no_test_run
jobs-linux-other-checks:
build-linux-clang-no-test:
jobs:
- build-linux-clang10-clang-analyze
- build-linux-unity-and-headers
- build-linux-mini-crashtest
jobs-windows:
- build-linux-clang-no-test
build-linux-cmake:
jobs:
- build-windows:
name: "build-windows-vs2019"
- build-windows:
name: "build-windows-vs2019-cxx20"
extra_cmake_opt: -DCMAKE_CXX_STANDARD=20
- build-windows:
name: "build-windows-vs2017"
vs_year: "2017"
cmake_generator: "Visual Studio 15 Win64"
- build-cmake-mingw
jobs-java:
- build-linux-cmake
build-windows:
jobs:
- build-linux-java
- build-linux-java-static
- build-macos-java
- build-macos-java-static
- build-macos-java-static-universal
jobs-macos:
jobs:
- build-macos
- build-macos-cmake:
run_even_tests: true
- build-macos-cmake:
run_even_tests: false
jobs-linux-arm:
jobs:
- build-linux-arm
nightly:
triggers:
- schedule:
cron: "0 9 * * *"
filters:
branches:
only:
- main
jobs:
- build-format-compatible
- build-linux-arm-test-full
- build-linux-run-microbench
- build-linux-non-shm
- build-linux-clang-13-asan-ubsan-with-folly
- build-linux-valgrind
- build-windows

View File

@ -1,6 +0,0 @@
# Supress UBSAN warnings related to stl_tree.h, e.g.
# UndefinedBehaviorSanitizer: undefined-behavior /usr/bin/../lib/gcc/x86_64-linux-gnu/5.4.0/../../../../include/c++/5.4.0/bits/stl_tree.h:1505:43 in
# /usr/bin/../lib/gcc/x86_64-linux-gnu/5.4.0/../../../../include/c++/5.4.0/bits/stl_tree.h:1505:43:
# runtime error: upcast of address 0x000001fa8820 with insufficient space for an object of type
# 'std::_Rb_tree_node<std::pair<const std::__cxx11::basic_string<char>, rocksdb::(anonymous namespace)::LockHoldingInfo> >'
src:*bits/stl_tree.h

View File

@ -1,24 +0,0 @@
$VS_DOWNLOAD_LINK = "https://go.microsoft.com/fwlink/?LinkId=691126"
$COLLECT_DOWNLOAD_LINK = "https://aka.ms/vscollect.exe"
curl.exe --retry 3 -kL $VS_DOWNLOAD_LINK --output vs_installer.exe
if ($LASTEXITCODE -ne 0) {
echo "Download of the VS 2015 installer failed"
exit 1
}
$VS_INSTALL_ARGS = @("/Quiet", "/NoRestart")
$process = Start-Process "${PWD}\vs_installer.exe" -ArgumentList $VS_INSTALL_ARGS -NoNewWindow -Wait -PassThru
Remove-Item -Path vs_installer.exe -Force
$exitCode = $process.ExitCode
if (($exitCode -ne 0) -and ($exitCode -ne 3010)) {
echo "VS 2015 installer exited with code $exitCode, which should be one of [0, 3010]."
curl.exe --retry 3 -kL $COLLECT_DOWNLOAD_LINK --output Collect.exe
if ($LASTEXITCODE -ne 0) {
echo "Download of the VS Collect tool failed."
exit 1
}
Start-Process "${PWD}\Collect.exe" -NoNewWindow -Wait -PassThru
New-Item -Path "C:\w\build-results" -ItemType "directory" -Force
Copy-Item -Path "C:\Users\circleci\AppData\Local\Temp\vslogs.zip" -Destination "C:\w\build-results\"
exit 1
}
echo "VS 2015 installed."

View File

@ -1,35 +0,0 @@
$VS_DOWNLOAD_LINK = "https://aka.ms/vs/15/release/vs_buildtools.exe"
$COLLECT_DOWNLOAD_LINK = "https://aka.ms/vscollect.exe"
$VS_INSTALL_ARGS = @("--nocache","--quiet","--wait", "--add Microsoft.VisualStudio.Workload.VCTools",
"--add Microsoft.VisualStudio.Component.VC.Tools.14.13",
"--add Microsoft.Component.MSBuild",
"--add Microsoft.VisualStudio.Component.Roslyn.Compiler",
"--add Microsoft.VisualStudio.Component.TextTemplating",
"--add Microsoft.VisualStudio.Component.VC.CoreIde",
"--add Microsoft.VisualStudio.Component.VC.Redist.14.Latest",
"--add Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Core",
"--add Microsoft.VisualStudio.Component.VC.Tools.x86.x64",
"--add Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Win81")
curl.exe --retry 3 -kL $VS_DOWNLOAD_LINK --output vs_installer.exe
if ($LASTEXITCODE -ne 0) {
echo "Download of the VS 2017 installer failed"
exit 1
}
$process = Start-Process "${PWD}\vs_installer.exe" -ArgumentList $VS_INSTALL_ARGS -NoNewWindow -Wait -PassThru
Remove-Item -Path vs_installer.exe -Force
$exitCode = $process.ExitCode
if (($exitCode -ne 0) -and ($exitCode -ne 3010)) {
echo "VS 2017 installer exited with code $exitCode, which should be one of [0, 3010]."
curl.exe --retry 3 -kL $COLLECT_DOWNLOAD_LINK --output Collect.exe
if ($LASTEXITCODE -ne 0) {
echo "Download of the VS Collect tool failed."
exit 1
}
Start-Process "${PWD}\Collect.exe" -NoNewWindow -Wait -PassThru
New-Item -Path "C:\w\build-results" -ItemType "directory" -Force
Copy-Item -Path "C:\Users\circleci\AppData\Local\Temp\vslogs.zip" -Destination "C:\w\build-results\"
exit 1
}
echo "VS 2017 installed."

View File

@ -32,13 +32,10 @@ jobs:
- name: Download clang-format-diff.py
uses: wei/wget@v1
with:
args: https://raw.githubusercontent.com/llvm/llvm-project/release/12.x/clang/tools/clang-format/clang-format-diff.py
args: https://raw.githubusercontent.com/llvm-mirror/clang/master/tools/clang-format/clang-format-diff.py
- name: Check format
run: VERBOSE_CHECK=1 make check-format
- name: Compare buckify output
run: make check-buck-targets
- name: Simple source code checks
run: make check-sources

9
.gitignore vendored
View File

@ -1,5 +1,4 @@
make_config.mk
rocksdb.pc
*.a
*.arc
@ -8,7 +7,6 @@ rocksdb.pc
*.gcda
*.gcno
*.o
*.o.tmp
*.so
*.so.*
*_test
@ -54,7 +52,6 @@ db_test2
trace_analyzer
trace_analyzer_test
block_cache_trace_analyzer
io_tracer_parser
.DS_Store
.vs
.vscode
@ -90,9 +87,3 @@ buckifier/__pycache__
compile_commands.json
clang-format-diff.py
.py3/
fuzz/proto/gen/
fuzz/crash-*
cmake-build-*
third-party/folly/

View File

@ -2,19 +2,20 @@ dist: xenial
language: cpp
os:
- linux
- osx
arch:
- amd64
- arm64
- ppc64le
- s390x
compiler:
- clang
- gcc
osx_image: xcode9.4
cache:
- ccache
addons:
apt:
update: true
sources:
- ubuntu-toolchain-r-test
packages:
@ -25,6 +26,15 @@ addons:
- liblzma-dev # xv
- libzstd-dev
- zlib1g-dev
homebrew:
update: true
packages:
- ccache
- gflags
- lz4
- snappy
- xz
- zstd
env:
- TEST_GROUP=platform_dependent # 16-18 minutes
@ -43,180 +53,212 @@ env:
- JOB_NAME=cmake-gcc9 # 3-5 minutes
- JOB_NAME=cmake-gcc9-c++20 # 3-5 minutes
- JOB_NAME=cmake-mingw # 3 minutes
- JOB_NAME=make-gcc4.8
- JOB_NAME=status_checked
matrix:
exclude:
- os: osx
env: JOB_NAME=cmake-gcc8
- os: osx
env: JOB_NAME=cmake-gcc9
- os: osx
env: JOB_NAME=cmake-gcc9-c++20
- os: osx
env: JOB_NAME=cmake-mingw
- os: osx
env: JOB_NAME=make-gcc4.8
- os: osx
arch: ppc64le
- os: osx
compiler: gcc
- os : linux
arch: arm64
env: JOB_NAME=cmake-mingw
- os : linux
arch: arm64
env: JOB_NAME=make-gcc4.8
- os: linux
arch: ppc64le
env: JOB_NAME=cmake-mingw
- os: linux
arch: s390x
env: JOB_NAME=cmake-mingw
arch: ppc64le
env: JOB_NAME=make-gcc4.8
- os: linux
compiler: clang
- if: type = pull_request AND commit_message !~ /FULL_CI/
os: linux
arch: arm64
env: TEST_GROUP=platform_dependent
- if: type = pull_request AND commit_message !~ /FULL_CI/
# Exclude all but most unique cmake variants for pull requests, but build all in branches
- if: type = pull_request
os : linux
arch: arm64
env: TEST_GROUP=1
- if: type = pull_request AND commit_message !~ /FULL_CI/
os: linux
arch: ppc64le
env: TEST_GROUP=1
- if: type = pull_request AND commit_message !~ /FULL_CI/
os: linux
arch: s390x
env: TEST_GROUP=1
- if: type = pull_request AND commit_message !~ /FULL_CI/
os : linux
arch: arm64
env: TEST_GROUP=2
- if: type = pull_request AND commit_message !~ /FULL_CI/
os: linux
arch: ppc64le
env: TEST_GROUP=2
- if: type = pull_request AND commit_message !~ /FULL_CI/
os: linux
arch: s390x
env: TEST_GROUP=2
- if: type = pull_request AND commit_message !~ /FULL_CI/
os : linux
arch: arm64
env: TEST_GROUP=3
- if: type = pull_request AND commit_message !~ /FULL_CI/
os: linux
arch: ppc64le
env: TEST_GROUP=3
- if: type = pull_request AND commit_message !~ /FULL_CI/
os: linux
arch: s390x
env: TEST_GROUP=3
- if: type = pull_request AND commit_message !~ /FULL_CI/
os : linux
arch: arm64
env: TEST_GROUP=4
- if: type = pull_request AND commit_message !~ /FULL_CI/
os: linux
arch: ppc64le
env: TEST_GROUP=4
- if: type = pull_request AND commit_message !~ /FULL_CI/
os: linux
arch: s390x
env: TEST_GROUP=4
- if: type = pull_request AND commit_message !~ /FULL_CI/
os : linux
arch: arm64
arch: amd64
env: JOB_NAME=cmake
- if: type = pull_request AND commit_message !~ /FULL_CI/ AND commit_message !~ /java/
- if: type = pull_request
os : linux
arch: amd64
env: JOB_NAME=cmake-gcc8
- if: type = pull_request
os : linux
arch: amd64
env: JOB_NAME=cmake-gcc9
# Exclude most osx, arm64 and ppc64le tests for pull requests, but build in branches
# Temporarily disable ppc64le unit tests in PRs until Travis gets its act together (#6653)
- if: type = pull_request
os: linux
arch: ppc64le
env: TEST_GROUP=platform_dependent
# NB: the cmake build is a partial java test
- if: type = pull_request
os: osx
env: TEST_GROUP=1
- if: type = pull_request
os : linux
arch: arm64
env: TEST_GROUP=1
- if: type = pull_request
os: linux
arch: ppc64le
env: TEST_GROUP=1
- if: type = pull_request
os: osx
env: TEST_GROUP=2
- if: type = pull_request
os : linux
arch: arm64
env: TEST_GROUP=2
- if: type = pull_request
os: linux
arch: ppc64le
env: TEST_GROUP=2
- if: type = pull_request
os: osx
env: TEST_GROUP=3
- if: type = pull_request
os : linux
arch: arm64
env: TEST_GROUP=3
- if: type = pull_request
os: linux
arch: ppc64le
env: TEST_GROUP=3
- if: type = pull_request
os: osx
env: TEST_GROUP=4
- if: type = pull_request
os : linux
arch: arm64
env: TEST_GROUP=4
- if: type = pull_request
os: linux
arch: ppc64le
env: TEST_GROUP=4
- if: type = pull_request AND commit_message !~ /java/
os : osx
env: JOB_NAME=java_test
- if: type = pull_request AND commit_message !~ /java/
os : linux
arch: arm64
env: JOB_NAME=java_test
- if: type = pull_request AND commit_message !~ /FULL_CI/ AND commit_message !~ /java/
- if: type = pull_request AND commit_message !~ /java/
os: linux
arch: ppc64le
env: JOB_NAME=java_test
- if: type = pull_request AND commit_message !~ /FULL_CI/ AND commit_message !~ /java/
os: linux
arch: s390x
env: JOB_NAME=java_test
- if: type = pull_request AND commit_message !~ /FULL_CI/
- if: type = pull_request
os : osx
env: JOB_NAME=lite_build
- if: type = pull_request
os : linux
arch: arm64
env: JOB_NAME=lite_build
- if: type = pull_request AND commit_message !~ /FULL_CI/
- if: type = pull_request
os: linux
arch: ppc64le
env: JOB_NAME=lite_build
- if: type = pull_request AND commit_message !~ /FULL_CI/
os: linux
arch: s390x
env: JOB_NAME=lite_build
- if: type = pull_request AND commit_message !~ /FULL_CI/
- if: type = pull_request
os : osx
env: JOB_NAME=examples
- if: type = pull_request
os : linux
arch: arm64
env: JOB_NAME=examples
- if: type = pull_request AND commit_message !~ /FULL_CI/
- if: type = pull_request
os: linux
arch: ppc64le
env: JOB_NAME=examples
- if: type = pull_request AND commit_message !~ /FULL_CI/
os: linux
arch: s390x
env: JOB_NAME=examples
- if: type = pull_request AND commit_message !~ /FULL_CI/
- if: type = pull_request
os : linux
arch: arm64
env: JOB_NAME=cmake-gcc8
- if: type = pull_request AND commit_message !~ /FULL_CI/
- if: type = pull_request
os: linux
arch: ppc64le
env: JOB_NAME=cmake-gcc8
- if: type = pull_request AND commit_message !~ /FULL_CI/
os: linux
arch: s390x
env: JOB_NAME=cmake-gcc8
- if: type = pull_request AND commit_message !~ /FULL_CI/
- if: type = pull_request
os : linux
arch: arm64
env: JOB_NAME=cmake-gcc9
- if: type = pull_request AND commit_message !~ /FULL_CI/
- if: type = pull_request
os: linux
arch: ppc64le
env: JOB_NAME=cmake-gcc9
- if: type = pull_request AND commit_message !~ /FULL_CI/
os: linux
arch: s390x
env: JOB_NAME=cmake-gcc9
- if: type = pull_request AND commit_message !~ /FULL_CI/
- if: type = pull_request
os : linux
arch: arm64
env: JOB_NAME=cmake-gcc9-c++20
- if: type = pull_request AND commit_message !~ /FULL_CI/
- if: type = pull_request
os: linux
arch: ppc64le
env: JOB_NAME=cmake-gcc9-c++20
- if: type = pull_request AND commit_message !~ /FULL_CI/
- if: type = pull_request
os : osx
env: JOB_NAME=status_checked
- if: type = pull_request
os : linux
arch: arm64
env: JOB_NAME=status_checked
- if: type = pull_request
os: linux
arch: s390x
env: JOB_NAME=cmake-gcc9-c++20
arch: ppc64le
env: JOB_NAME=status_checked
install:
- CC=gcc-7 && CXX=g++-7
- if [ "${TRAVIS_OS_NAME}" == osx ]; then
PATH=$PATH:/usr/local/opt/ccache/libexec;
fi
- if [ "${JOB_NAME}" == cmake-gcc8 ]; then
sudo apt-get install -y g++-8 || exit $?;
sudo apt-get install -y g++-8;
CC=gcc-8 && CXX=g++-8;
fi
- if [ "${JOB_NAME}" == cmake-gcc9 ] || [ "${JOB_NAME}" == cmake-gcc9-c++20 ]; then
sudo apt-get install -y g++-9 || exit $?;
sudo apt-get install -y g++-9;
CC=gcc-9 && CXX=g++-9;
fi
- if [ "${JOB_NAME}" == cmake-mingw ]; then
sudo apt-get install -y mingw-w64 || exit $?;
sudo apt-get install -y mingw-w64 ;
fi
- if [ "${CXX}" == "g++-7" ]; then
sudo apt-get install -y g++-7 || exit $?;
- if [ "${JOB_NAME}" == make-gcc4.8 ]; then
sudo apt-get install -y g++-4.8 ;
CC=gcc-4.8 && CXX=g++-4.8;
fi
- |
if [[ "${JOB_NAME}" == cmake* ]]; then
sudo apt-get remove -y cmake cmake-data
export CMAKE_DEB="cmake-3.14.5-Linux-$(uname -m).deb"
export CMAKE_DEB_URL="https://rocksdb-deps.s3-us-west-2.amazonaws.com/cmake/${CMAKE_DEB}"
curl --silent --fail --show-error --location --output "${CMAKE_DEB}" "${CMAKE_DEB_URL}" || exit $?
sudo dpkg -i "${CMAKE_DEB}" || exit $?
which cmake && cmake --version
- if [[ "${JOB_NAME}" == cmake* ]] && [ "${TRAVIS_OS_NAME}" == linux ]; then
CMAKE_DIST_URL="https://rocksdb-deps.s3-us-west-2.amazonaws.com/cmake/cmake-3.14.5-Linux-$(uname -m).tar.bz2";
TAR_OPT="--strip-components=1 -xj";
if [ "aarch64" == "$(uname -m)" ]; then
sudo apt-get install -y libuv1 librhash0;
sudo apt-get upgrade -y libstdc++6;
fi;
mkdir cmake-dist && curl --silent --fail --show-error --location "${CMAKE_DIST_URL}" | tar -C cmake-dist ${TAR_OPT} && export PATH=$PWD/cmake-dist/bin:$PATH;
fi
- |
if [[ "${JOB_NAME}" == java_test || "${JOB_NAME}" == cmake* ]]; then
# Ensure JDK 8
sudo apt-get install -y openjdk-8-jdk || exit $?
export PATH=/usr/lib/jvm/java-8-openjdk-$(dpkg --print-architecture)/bin:$PATH
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-$(dpkg --print-architecture)
if [ "${TRAVIS_OS_NAME}" == osx ]; then
brew tap AdoptOpenJDK/openjdk
brew cask install adoptopenjdk8
export JAVA_HOME=$(/usr/libexec/java_home)
else
sudo apt-get install -y openjdk-8-jdk
export PATH=/usr/lib/jvm/java-8-openjdk-$(dpkg --print-architecture)/bin:$PATH
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-$(dpkg --print-architecture)
fi
echo "JAVA_HOME=${JAVA_HOME}"
which java && java -version
which javac && javac -version
@ -230,36 +272,32 @@ before_script:
script:
- date; ${CXX} --version
- if [ `command -v ccache` ]; then ccache -C; fi
- export MK_PARALLEL=4;
if [[ "$TRAVIS_CPU_ARCH" == s390x ]]; then
export MK_PARALLEL=1;
fi
- case $TEST_GROUP in
platform_dependent)
OPT=-DTRAVIS LIB_MODE=shared V=1 ROCKSDBTESTS_PLATFORM_DEPENDENT=only make -j$MK_PARALLEL all_but_some_tests check_some
OPT=-DTRAVIS V=1 ROCKSDBTESTS_END=db_block_cache_test make -j4 all_but_some_tests check_some
;;
1)
OPT=-DTRAVIS LIB_MODE=shared V=1 ROCKSDBTESTS_PLATFORM_DEPENDENT=exclude ROCKSDBTESTS_END=backup_engine_test make -j$MK_PARALLEL check_some
OPT=-DTRAVIS V=1 ROCKSDBTESTS_START=db_block_cache_test ROCKSDBTESTS_END=db_iter_test make -j4 check_some
;;
2)
OPT="-DTRAVIS -DROCKSDB_NAMESPACE=alternative_rocksdb_ns" LIB_MODE=shared V=1 make -j$MK_PARALLEL tools && OPT="-DTRAVIS -DROCKSDB_NAMESPACE=alternative_rocksdb_ns" LIB_MODE=shared V=1 ROCKSDBTESTS_PLATFORM_DEPENDENT=exclude ROCKSDBTESTS_START=backup_engine_test ROCKSDBTESTS_END=db_universal_compaction_test make -j$MK_PARALLEL check_some
OPT="-DTRAVIS -DROCKSDB_NAMESPACE=alternative_rocksdb_ns" V=1 make -j4 tools && OPT="-DTRAVIS -DROCKSDB_NAMESPACE=alternative_rocksdb_ns" V=1 ROCKSDBTESTS_START=db_iter_test ROCKSDBTESTS_END=options_file_test make -j4 check_some
;;
3)
OPT=-DTRAVIS LIB_MODE=shared V=1 ROCKSDBTESTS_PLATFORM_DEPENDENT=exclude ROCKSDBTESTS_START=db_universal_compaction_test ROCKSDBTESTS_END=table_properties_collector_test make -j$MK_PARALLEL check_some
OPT=-DTRAVIS V=1 ROCKSDBTESTS_START=options_file_test ROCKSDBTESTS_END=write_prepared_transaction_test make -j4 check_some
;;
4)
OPT=-DTRAVIS LIB_MODE=shared V=1 ROCKSDBTESTS_PLATFORM_DEPENDENT=exclude ROCKSDBTESTS_START=table_properties_collector_test make -j$MK_PARALLEL check_some
OPT=-DTRAVIS V=1 ROCKSDBTESTS_START=write_prepared_transaction_test make -j4 check_some
;;
esac
- case $JOB_NAME in
java_test)
OPT=-DTRAVIS LIB_MODE=shared V=1 make rocksdbjava jtest
OPT=-DTRAVIS V=1 make rocksdbjava jtest
;;
lite_build)
OPT='-DTRAVIS -DROCKSDB_LITE' LIB_MODE=shared V=1 make -j$MK_PARALLEL all
OPT='-DTRAVIS -DROCKSDB_LITE' V=1 make -j4 all
;;
examples)
OPT=-DTRAVIS LIB_MODE=shared V=1 make -j$MK_PARALLEL static_lib && cd examples && make -j$MK_PARALLEL
OPT=-DTRAVIS V=1 make -j4 static_lib && cd examples && make -j4
;;
cmake-mingw)
sudo update-alternatives --set x86_64-w64-mingw32-g++ /usr/bin/x86_64-w64-mingw32-g++-posix;
@ -272,7 +310,13 @@ script:
;;
esac
mkdir build && cd build && cmake -DCMAKE_BUILD_TYPE=Release -DWITH_TESTS=0 -DWITH_GFLAGS=0 -DWITH_BENCHMARK_TOOLS=0 -DWITH_TOOLS=0 -DWITH_CORE_TOOLS=1 .. && make -j$MK_PARALLEL && cd .. && rm -rf build && mkdir build && cd build && cmake -DJNI=1 .. -DCMAKE_BUILD_TYPE=Release $OPT && make -j$MK_PARALLEL rocksdb rocksdbjni
mkdir build && cd build && cmake -DCMAKE_BUILD_TYPE=Release -DWITH_TESTS=0 -DWITH_GFLAGS=0 -DWITH_BENCHMARK_TOOLS=0 -DWITH_TOOLS=0 -DWITH_CORE_TOOLS=1 .. && make -j4 && cd .. && rm -rf build && mkdir build && cd build && cmake -DJNI=1 .. -DCMAKE_BUILD_TYPE=Release $OPT && make -j4 rocksdb rocksdbjni
;;
make-gcc4.8)
OPT=-DTRAVIS V=1 SKIP_LINK=1 make -j4 all && [ "Linking broken because libgflags compiled with newer ABI" ]
;;
status_checked)
OPT=-DTRAVIS V=1 ASSERT_STATUS_CHECKED=1 make -j4 check_some
;;
esac
notifications:

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
# RocksDB default options change log (NO LONGER MAINTAINED)
# RocksDB default options change log
## Unreleased
* delayed_write_rate takes the rate given by rate_limiter if not specified.

File diff suppressed because it is too large Load Diff

View File

@ -6,7 +6,7 @@ than release mode.
RocksDB's library should be able to compile without any dependency installed,
although we recommend installing some compression libraries (see below).
We do depend on newer gcc/clang with C++17 support (GCC >= 7, Clang >= 5).
We do depend on newer gcc/clang with C++11 support.
There are few options when compiling RocksDB:
@ -43,16 +43,12 @@ to build a portable binary, add `PORTABLE=1` before your make commands, like thi
command line flags processing. You can compile rocksdb library even
if you don't have gflags installed.
* `make check` will also check code formatting, which requires [clang-format](https://clang.llvm.org/docs/ClangFormat.html)
* If you wish to build the RocksJava static target, then cmake is required for building Snappy.
* If you wish to run microbench (e.g, `make microbench`, `make ribbon_bench` or `cmake -DWITH_BENCHMARK=1`), Google benchmark >= 1.6.0 is needed.
## Supported platforms
* **Linux - Ubuntu**
* Upgrade your gcc to version at least 7 to get C++17 support.
* Upgrade your gcc to version at least 4.8 to get C++11 support.
* Install gflags. First, try: `sudo apt-get install libgflags-dev`
If this doesn't work and you're using Ubuntu, here's a nice tutorial:
(http://askubuntu.com/questions/312173/installing-gflags-12-04)
@ -64,7 +60,8 @@ to build a portable binary, add `PORTABLE=1` before your make commands, like thi
* Install zstandard: `sudo apt-get install libzstd-dev`.
* **Linux - CentOS / RHEL**
* Upgrade your gcc to version at least 7 to get C++17 support
* Upgrade your gcc to version at least 4.8 to get C++11 support:
`yum install gcc48-c++`
* Install gflags:
git clone https://github.com/gflags/gflags.git
@ -97,28 +94,19 @@ to build a portable binary, add `PORTABLE=1` before your make commands, like thi
sudo yum install libasan
* Install zstandard:
* With [EPEL](https://fedoraproject.org/wiki/EPEL):
sudo yum install libzstd-devel
* With CentOS 8:
sudo dnf install libzstd-devel
* From source:
wget https://github.com/facebook/zstd/archive/v1.1.3.tar.gz
mv v1.1.3.tar.gz zstd-1.1.3.tar.gz
tar zxvf zstd-1.1.3.tar.gz
cd zstd-1.1.3
make && sudo make install
wget https://github.com/facebook/zstd/archive/v1.1.3.tar.gz
mv v1.1.3.tar.gz zstd-1.1.3.tar.gz
tar zxvf zstd-1.1.3.tar.gz
cd zstd-1.1.3
make && sudo make install
* **OS X**:
* Install latest C++ compiler that supports C++ 17:
* Install latest C++ compiler that supports C++ 11:
* Update XCode: run `xcode-select --install` (or install it from XCode App's settting).
* Install via [homebrew](http://brew.sh/).
* If you're first time developer in MacOS, you still need to run: `xcode-select --install` in your command line.
* run `brew tap homebrew/versions; brew install gcc7 --use-llvm` to install gcc 7 (or higher).
* run `brew tap homebrew/versions; brew install gcc48 --use-llvm` to install gcc 4.8 (or higher).
* run `brew install rocksdb`
* **FreeBSD** (11.01):
@ -161,7 +149,7 @@ to build a portable binary, add `PORTABLE=1` before your make commands, like thi
* Install the dependencies for RocksDB:
pkg_add gmake gflags snappy bzip2 lz4 zstd git jdk bash findutils gnuwatch
pkg_add gmake gflags snappy bzip2 lz4 zstd git jdk bash findutils gnuwatch
* Build RocksDB from source:
@ -180,15 +168,16 @@ to build a portable binary, add `PORTABLE=1` before your make commands, like thi
* **iOS**:
* Run: `TARGET_OS=IOS make static_lib`. When building the project which uses rocksdb iOS library, make sure to define two important pre-processing macros: `ROCKSDB_LITE` and `IOS_CROSS_COMPILE`.
* **Windows** (Visual Studio 2017 to up):
* **Windows**:
* For building with MS Visual Studio 13 you will need Update 4 installed.
* Read and follow the instructions at CMakeLists.txt
* Or install via [vcpkg](https://github.com/microsoft/vcpkg)
* Or install via [vcpkg](https://github.com/microsoft/vcpkg)
* run `vcpkg install rocksdb:x64-windows`
* **AIX 6.1**
* Install AIX Toolbox rpms with gcc
* Use these environment variables:
export PORTABLE=1
export CC=gcc
export AR="ar -X64"
@ -199,9 +188,9 @@ to build a portable binary, add `PORTABLE=1` before your make commands, like thi
export LIBPATH=/opt/freeware/lib
export JAVA_HOME=/usr/java8_64
export PATH=/opt/freeware/bin:$PATH
* **Solaris Sparc**
* Install GCC 7 and higher.
* Install GCC 4.8.2 and higher.
* Use these environment variables:
export CC=gcc
@ -210,3 +199,4 @@ to build a portable binary, add `PORTABLE=1` before your make commands, like thi
export EXTRA_LDFLAGS=-m64
export PORTABLE=1
export PLATFORM_LDFLAGS="-static-libstdc++ -static-libgcc"

View File

@ -1,6 +1,6 @@
This is the list of all known third-party language bindings for RocksDB. If something is missing, please open a pull request to add it.
* Java - https://github.com/facebook/rocksdb/tree/main/java
* Java - https://github.com/facebook/rocksdb/tree/master/java
* Python
* http://python-rocksdb.readthedocs.io/en/latest/
* http://pyrocksdb.readthedocs.org/en/latest/ (unmaintained)
@ -10,9 +10,7 @@ This is the list of all known third-party language bindings for RocksDB. If some
* Ruby - http://rubygems.org/gems/rocksdb-ruby
* Haskell - https://hackage.haskell.org/package/rocksdb-haskell
* PHP - https://github.com/Photonios/rocksdb-php
* C#
* https://github.com/warrenfalk/rocksdb-sharp
* https://github.com/curiosity-ai/rocksdb-sharp
* C# - https://github.com/warrenfalk/rocksdb-sharp
* Rust
* https://github.com/pingcap/rust-rocksdb (used in production fork of https://github.com/spacejam/rust-rocksdb)
* https://github.com/spacejam/rust-rocksdb

2325
Makefile

File diff suppressed because it is too large Load Diff

View File

@ -1,7 +0,0 @@
This is the list of all known third-party plugins for RocksDB. If something is missing, please open a pull request to add it.
* [Dedupfs](https://github.com/ajkr/dedupfs): an example for plugin developers to reference
* [HDFS](https://github.com/riversand963/rocksdb-hdfs-env): an Env used for interacting with HDFS. Migrated from main RocksDB repo
* [ZenFS](https://github.com/westerndigitalcorporation/zenfs): a file system for zoned block devices
* [RADOS](https://github.com/riversand963/rocksdb-rados-env): an Env used for interacting with RADOS. Migrated from RocksDB main repo.
* [PMEM](https://github.com/pmem/pmem-rocksdb-plugin): a collection of plugins to enable Persistent Memory on RocksDB.

View File

@ -1,9 +1,8 @@
## RocksDB: A Persistent Key-Value Store for Flash and RAM Storage
[![CircleCI Status](https://circleci.com/gh/facebook/rocksdb.svg?style=svg)](https://circleci.com/gh/facebook/rocksdb)
[![TravisCI Status](https://api.travis-ci.com/facebook/rocksdb.svg?branch=main)](https://travis-ci.com/github/facebook/rocksdb)
[![Appveyor Build status](https://ci.appveyor.com/api/projects/status/fbgfu0so3afcno78/branch/main?svg=true)](https://ci.appveyor.com/project/Facebook/rocksdb/branch/main)
[![PPC64le Build Status](http://140-211-168-68-openstack.osuosl.org:8080/buildStatus/icon?job=rocksdb&style=plastic)](http://140-211-168-68-openstack.osuosl.org:8080/job/rocksdb)
[![Linux/Mac Build Status](https://travis-ci.org/facebook/rocksdb.svg?branch=master)](https://travis-ci.org/facebook/rocksdb)
[![Windows Build status](https://ci.appveyor.com/api/projects/status/fbgfu0so3afcno78/branch/master?svg=true)](https://ci.appveyor.com/project/Facebook/rocksdb/branch/master)
[![PPC64le Build Status](http://140.211.168.68:8080/buildStatus/icon?job=Rocksdb)](http://140.211.168.68:8080/job/Rocksdb)
RocksDB is developed and maintained by Facebook Database Engineering Team.
It is built on earlier work on [LevelDB](https://github.com/google/leveldb) by Sanjay Ghemawat (sanjay@google.com)
@ -17,7 +16,7 @@ and Space-Amplification-Factor (SAF). It has multi-threaded compactions,
making it especially suitable for storing multiple terabytes of data in a
single database.
Start with example usage here: https://github.com/facebook/rocksdb/tree/main/examples
Start with example usage here: https://github.com/facebook/rocksdb/tree/master/examples
See the [github wiki](https://github.com/facebook/rocksdb/wiki) for more explanation.
@ -25,7 +24,7 @@ The public interface is in `include/`. Callers should not include or
rely on the details of any other header files in this package. Those
internal APIs may be changed without warning.
Questions and discussions are welcome on the [RocksDB Developers Public](https://www.facebook.com/groups/rocksdb.dev/) Facebook group and [email list](https://groups.google.com/g/rocksdb) on Google Groups.
Design discussions are conducted in https://www.facebook.com/groups/rocksdb.dev/ and https://rocksdb.slack.com/
## License

View File

@ -4,7 +4,7 @@ RocksDBLite is a project focused on mobile use cases, which don't need a lot of
Some examples of the features disabled by ROCKSDB_LITE:
* compiled-in support for LDB tool
* No backup engine
* No backupable DB
* No support for replication (which we provide in form of TransactionalIterator)
* No advanced monitoring tools
* No special-purpose memtables that are highly optimized for specific use cases

6939
TARGETS

File diff suppressed because it is too large Load Diff

View File

@ -26,9 +26,6 @@ Learn more about those use cases in a Tech Talk by Ankit Gupta and Naveen Somasu
## Yahoo
Yahoo is using RocksDB as a storage engine for their biggest distributed data store Sherpa. Learn more about it here: http://yahooeng.tumblr.com/post/120730204806/sherpa-scales-new-heights
## Baidu
[Apache Doris](http://doris.apache.org/master/en/) is a MPP analytical database engine released by Baidu. It [uses RocksDB](http://doris.apache.org/master/en/administrator-guide/operation/tablet-meta-tool.html) to manage its tablet's metadata.
## CockroachDB
CockroachDB is an open-source geo-replicated transactional database. They are using RocksDB as their storage engine. Check out their github: https://github.com/cockroachdb/cockroach
@ -47,7 +44,7 @@ Tango is using RocksDB as a graph storage to store all users' connection data an
Turn is using RocksDB as a storage layer for their key/value store, serving at peak 2.4MM QPS out of different datacenters.
Check out our RocksDB Protobuf merge operator at: https://github.com/vladb38/rocksdb_protobuf
## Santander UK/Cloudera Profession Services
## Santanader UK/Cloudera Profession Services
Check out their blog post: http://blog.cloudera.com/blog/2015/08/inside-santanders-near-real-time-data-ingest-architecture/
## Airbnb
@ -70,7 +67,7 @@ Pinterest's Object Retrieval System uses RocksDB for storage: https://www.youtub
[VWO's](https://vwo.com/) Smart Code checker and URL helper uses RocksDB to store all the URLs where VWO's Smart Code is installed.
## quasardb
[quasardb](https://www.quasardb.net) is a high-performance, distributed, transactional key-value database that integrates well with in-memory analytics engines such as Apache Spark.
[quasardb](https://www.quasardb.net) is a high-performance, distributed, transactional key-value database that integrates well with in-memory analytics engines such as Apache Spark.
quasardb uses a heavily tuned RocksDB as its persistence layer.
## Netflix
@ -89,7 +86,7 @@ quasardb uses a heavily tuned RocksDB as its persistence layer.
[Uber](http://eng.uber.com/cherami/) uses RocksDB as a durable and scalable task queue.
## 360 Pika
[360](http://www.360.cn/) [Pika](https://github.com/Qihoo360/pika) is a nosql compatible with redis. With the huge amount of data stored, redis may suffer for a capacity bottleneck, and pika was born for solving it. It has widely been used in many companies.
[360](http://www.360.cn/) [Pika](https://github.com/Qihoo360/pika) is a nosql compatible with redis. With the huge amount of data stored, redis may suffer for a capacity bottleneck, and pika was born for solving it. It has widely been widely used in many company
## LzLabs
LzLabs is using RocksDB as a storage engine in their multi-database distributed framework to store application configuration and user data.
@ -99,30 +96,16 @@ LzLabs is using RocksDB as a storage engine in their multi-database distributed
## IOTA Foundation
[IOTA Foundation](https://www.iota.org/) is using RocksDB in the [IOTA Reference Implementation (IRI)](https://github.com/iotaledger/iri) to store the local state of the Tangle. The Tangle is the first open-source distributed ledger powering the future of the Internet of Things.
## Avrio Project
[Avrio Project](http://avrio-project.github.io/avrio.network/) is using RocksDB in [Avrio ](https://github.com/avrio-project/avrio) to store blocks, account balances and data and other blockchain-releated data. Avrio is a multiblockchain decentralized cryptocurrency empowering monetary transactions.
## Crux
[Crux](https://github.com/juxt/crux) is a document database that uses RocksDB for local [EAV](https://en.wikipedia.org/wiki/Entity%E2%80%93attribute%E2%80%93value_model) index storage to enable point-in-time bitemporal Datalog queries. The "unbundled" architecture uses Kafka to provide horizontal scalability.
## Nebula Graph
[Nebula Graph](https://github.com/vesoft-inc/nebula) is a distributed, scalable, lightning-fast, open source graph database capable of hosting super large scale graphs with dozens of billions of vertices (nodes) and trillions of edges, with milliseconds of latency.
## YugabyteDB
[YugabyteDB](https://www.yugabyte.com/) is an open source, high performance, distributed SQL database that uses RocksDB as its storage layer. For more information, please see https://github.com/yugabyte/yugabyte-db/.
## ArangoDB
[ArangoDB](https://www.arangodb.com/) is a native multi-model database with flexible data models for documents, graphs, and key-values, for building high performance applications using a convenient SQL-like query language or JavaScript extensions. It uses RocksDB as its storage engine.
## Milvus
[Milvus](https://milvus.io/) is an open source vector database for unstructured data. It uses RocksDB not only as one of the supported kv storage engines, but also as a message queue.
## Kafka
[Kafka](https://kafka.apache.org/) is an open-source distributed event streaming platform, it uses RocksDB to store state in Kafka Streams: https://www.confluent.io/blog/how-to-tune-rocksdb-kafka-streams-state-stores-performance/.
## Solana Labs
[Solana](https://github.com/solana-labs/solana) is a fast, secure, scalable, and decentralized blockchain. It uses RocksDB as the underlying storage for its ledger store.
## Others
More databases using RocksDB can be found at [dbdb.io](https://dbdb.io/browse?embeds=rocksdb).

View File

@ -24,7 +24,7 @@ We strive to achieve the following goals:
* make all unit test pass both in debug and release builds.
* Note: latest introduction of SyncPoint seems to disable running db_test in Release.
* make performance on par with published benchmarks accounting for HW differences
* we would like to keep the port code inline with the main branch with no forking
* we would like to keep the port code inline with the master branch with no forking
## Build system
We have chosen CMake as a widely accepted build system to build the Windows port. It is very fast and convenient.
@ -66,7 +66,7 @@ We endeavored to make it functionally on par with posix_env. This means we repli
Even though Windows provides its own efficient thread-pool implementation we chose to replicate posix logic using `std::thread` primitives. This allows anyone to quickly detect any changes within the posix source code and replicate them within windows env. This has proven to work very well. At the same time for anyone who wishes to replace the built-in thread-pool can do so using RocksDB stackable environments.
For disk access we implemented all of the functionality present within the posix_env which includes memory mapped files, random access, rate-limiter support etc.
The `use_os_buffer` flag on Posix platforms currently denotes disabling read-ahead log via `fadvise` mechanism. Windows does not have `fadvise` system call. What is more, it implements disk cache in a way that differs from Linux greatly. It's not an uncommon practice on Windows to perform un-buffered disk access to gain control of the memory consumption. We think that in our use case this may also be a good configuration option at the expense of disk throughput. To compensate one may increase the configured in-memory cache size instead. Thus we have chosen `use_os_buffer=false` to disable OS disk buffering for `WinWritableFile` and `WinRandomAccessFile`. The OS imposes restrictions on the alignment of the disk offsets, buffers used and the amount of data that is read/written when accessing files in un-buffered mode. When the option is true, the classes behave in a standard way. This allows to perform writes and reads in cases when un-buffered access does not make sense such as WAL and MANIFEST.
The `use_os_buffer` flag on Posix platforms currently denotes disabling read-ahead log via `fadvise` mechanism. Windows does not have `fadvise` system call. What is more, it implements disk cache in a way that differs from Linux greatly. It’s not an uncommon practice on Windows to perform un-buffered disk access to gain control of the memory consumption. We think that in our use case this may also be a good configuration option at the expense of disk throughput. To compensate one may increase the configured in-memory cache size instead. Thus we have chosen `use_os_buffer=false` to disable OS disk buffering for `WinWritableFile` and `WinRandomAccessFile`. The OS imposes restrictions on the alignment of the disk offsets, buffers used and the amount of data that is read/written when accessing files in un-buffered mode. When the option is true, the classes behave in a standard way. This allows to perform writes and reads in cases when un-buffered access does not make sense such as WAL and MANIFEST.
We have replaced `pread/pwrite` with `WriteFile/ReadFile` with `OVERLAPPED` structure so we can atomically seek to the position of the disk operation but still perform the operation synchronously. Thus we able to emulate that functionality of `pread/pwrite` reasonably well. The only difference is that the file pointer is not returned to its original position but that hardly matters given the random nature of access.

86
appveyor.yml Normal file
View File

@ -0,0 +1,86 @@
version: 1.0.{build}
image: Visual Studio 2019
environment:
JAVA_HOME: C:\Program Files\Java\jdk1.8.0
THIRDPARTY_HOME: $(APPVEYOR_BUILD_FOLDER)\thirdparty
SNAPPY_HOME: $(THIRDPARTY_HOME)\snappy-1.1.7
SNAPPY_INCLUDE: $(SNAPPY_HOME);$(SNAPPY_HOME)\build
SNAPPY_LIB_DEBUG: $(SNAPPY_HOME)\build\Debug\snappy.lib
SNAPPY_LIB_RELEASE: $(SNAPPY_HOME)\build\Release\snappy.lib
LZ4_HOME: $(THIRDPARTY_HOME)\lz4-1.8.3
LZ4_INCLUDE: $(LZ4_HOME)\lib
LZ4_LIB_DEBUG: $(LZ4_HOME)\visual\VS2010\bin\x64_Debug\liblz4_static.lib
LZ4_LIB_RELEASE: $(LZ4_HOME)\visual\VS2010\bin\x64_Release\liblz4_static.lib
ZSTD_HOME: $(THIRDPARTY_HOME)\zstd-1.4.0
ZSTD_INCLUDE: $(ZSTD_HOME)\lib;$(ZSTD_HOME)\lib\dictBuilder
ZSTD_LIB_DEBUG: $(ZSTD_HOME)\build\VS2010\bin\x64_Debug\libzstd_static.lib
ZSTD_LIB_RELEASE: $(ZSTD_HOME)\build\VS2010\bin\x64_Release\libzstd_static.lib
matrix:
- APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015
CMAKE_GENERATOR: Visual Studio 14 Win64
DEV_ENV: C:\Program Files (x86)\Microsoft Visual Studio 14.0\Common7\IDE\devenv.com
- APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017
CMAKE_GENERATOR: Visual Studio 15 Win64
DEV_ENV: C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\Common7\IDE\devenv.com
- APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2019
CMAKE_GENERATOR: Visual Studio 16
CMAKE_PLATEFORM_NAME: x64
DEV_ENV: C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\Common7\IDE\devenv.com
- APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2019
CMAKE_GENERATOR: Visual Studio 16
CMAKE_PLATEFORM_NAME: x64
CMAKE_OPT: -DCMAKE_CXX_STANDARD=20
DEV_ENV: C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\Common7\IDE\devenv.com
install:
- md %THIRDPARTY_HOME%
- echo "Building Snappy dependency..."
- cd %THIRDPARTY_HOME%
- curl --fail --silent --show-error --output snappy-1.1.7.zip --location https://github.com/google/snappy/archive/1.1.7.zip
- unzip snappy-1.1.7.zip
- cd snappy-1.1.7
- mkdir build
- cd build
- if DEFINED CMAKE_PLATEFORM_NAME (set "PLATEFORM_OPT=-A %CMAKE_PLATEFORM_NAME%")
- cmake .. -G "%CMAKE_GENERATOR%" %PLATEFORM_OPT%
- msbuild Snappy.sln /p:Configuration=Debug /p:Platform=x64
- msbuild Snappy.sln /p:Configuration=Release /p:Platform=x64
- echo "Building LZ4 dependency..."
- cd %THIRDPARTY_HOME%
- curl --fail --silent --show-error --output lz4-1.8.3.zip --location https://github.com/lz4/lz4/archive/v1.8.3.zip
- unzip lz4-1.8.3.zip
- cd lz4-1.8.3\visual\VS2010
- ps: $CMD="$Env:DEV_ENV"; & $CMD lz4.sln /upgrade
- msbuild lz4.sln /p:Configuration=Debug /p:Platform=x64
- msbuild lz4.sln /p:Configuration=Release /p:Platform=x64
- echo "Building ZStd dependency..."
- cd %THIRDPARTY_HOME%
- curl --fail --silent --show-error --output zstd-1.4.0.zip --location https://github.com/facebook/zstd/archive/v1.4.0.zip
- unzip zstd-1.4.0.zip
- cd zstd-1.4.0\build\VS2010
- ps: $CMD="$Env:DEV_ENV"; & $CMD zstd.sln /upgrade
- msbuild zstd.sln /p:Configuration=Debug /p:Platform=x64
- msbuild zstd.sln /p:Configuration=Release /p:Platform=x64
before_build:
- md %APPVEYOR_BUILD_FOLDER%\build
- cd %APPVEYOR_BUILD_FOLDER%\build
- if DEFINED CMAKE_PLATEFORM_NAME (set "PLATEFORM_OPT=-A %CMAKE_PLATEFORM_NAME%")
- cmake .. -G "%CMAKE_GENERATOR%" %PLATEFORM_OPT% %CMAKE_OPT% -DCMAKE_BUILD_TYPE=Debug -DOPTDBG=1 -DPORTABLE=1 -DSNAPPY=1 -DLZ4=1 -DZSTD=1 -DXPRESS=1 -DJNI=1
- cd ..
build:
project: build\rocksdb.sln
parallel: true
verbosity: normal
test:
test_script:
- ps: build_tools\run_ci_db_test.ps1 -SuiteRun db_basic_test,db_with_timestamp_basic_test,db_test2,db_test,env_basic_test,env_test,db_merge_operand_test -Concurrency 8
on_failure:
- cmd: 7z a build-failed.zip %APPVEYOR_BUILD_FOLDER%\build\ && appveyor PushArtifact build-failed.zip

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

191
buckifier/buckify_rocksdb.py Executable file → Normal file
View File

@ -1,4 +1,3 @@
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
from __future__ import absolute_import
from __future__ import division
@ -25,10 +24,10 @@ from util import ColorString
# (This generates a TARGET file without user-specified dependency for unit
# tests.)
# $python3 buckifier/buckify_rocksdb.py \
# '{"fake": {
# "extra_deps": [":test_dep", "//fakes/module:mock1"],
# "extra_compiler_flags": ["-DROCKSDB_LITE", "-Os"]
# }
# '{"fake": { \
# "extra_deps": [":test_dep", "//fakes/module:mock1"], \
# "extra_compiler_flags": ["-DROCKSDB_LITE", "-Os"], \
# } \
# }'
# (Generated TARGETS file has test_dep and mock1 as dependencies for RocksDB
# unit tests, and will use the extra_compiler_flags to compile the unit test
@ -49,8 +48,8 @@ def parse_src_mk(repo_path):
if '=' in line:
current_src = line.split('=')[0].strip()
src_files[current_src] = []
elif '.c' in line:
src_path = line.split('\\')[0].strip()
elif '.cc' in line:
src_path = line.split('.cc')[0].strip() + '.cc'
src_files[current_src].append(src_path)
return src_files
@ -70,28 +69,44 @@ def get_cc_files(repo_path):
return cc_files
# Get non_parallel tests from Makefile
def get_non_parallel_tests(repo_path):
# Get tests from Makefile
def get_tests(repo_path):
Makefile = repo_path + "/Makefile"
s = set({})
# Dictionary TEST_NAME => IS_PARALLEL
tests = {}
found_non_parallel_tests = False
found_tests = False
for line in open(Makefile):
line = line.strip()
if line.startswith("NON_PARALLEL_TEST ="):
found_non_parallel_tests = True
elif found_non_parallel_tests:
if line.startswith("TESTS ="):
found_tests = True
elif found_tests:
if line.endswith("\\"):
# remove the trailing \
line = line[:-1]
line = line.strip()
s.add(line)
tests[line] = False
else:
# we consumed all the non_parallel tests
# we consumed all the tests
break
return s
found_parallel_tests = False
for line in open(Makefile):
line = line.strip()
if line.startswith("PARALLEL_TEST ="):
found_parallel_tests = True
elif found_parallel_tests:
if line.endswith("\\"):
# remove the trailing \
line = line[:-1]
line = line.strip()
tests[line] = True
else:
# we consumed all the parallel tests
break
return tests
# Parse extra dependencies passed by user from command line
def get_dependencies():
@ -124,39 +139,19 @@ def generate_targets(repo_path, deps_map):
src_mk = parse_src_mk(repo_path)
# get all .cc files
cc_files = get_cc_files(repo_path)
# get non_parallel tests from Makefile
non_parallel_tests = get_non_parallel_tests(repo_path)
# get tests from Makefile
tests = get_tests(repo_path)
if src_mk is None or cc_files is None or non_parallel_tests is None:
if src_mk is None or cc_files is None or tests is None:
return False
extra_argv = ""
if len(sys.argv) >= 2:
# Heuristically quote and canonicalize whitespace for inclusion
# in how the file was generated.
extra_argv = " '{0}'".format(" ".join(sys.argv[1].split()))
TARGETS = TARGETSBuilder("%s/TARGETS" % repo_path, extra_argv)
TARGETS = TARGETSBuilder("%s/TARGETS" % repo_path)
# rocksdb_lib
TARGETS.add_library(
"rocksdb_lib",
src_mk["LIB_SOURCES"] +
# always add range_tree, it's only excluded on ppc64, which we don't use internally
src_mk["RANGE_TREE_SOURCES"] +
src_mk["TOOL_LIB_SOURCES"],
deps=["//folly/container:f14_hash"])
# rocksdb_whole_archive_lib
TARGETS.add_library(
"rocksdb_whole_archive_lib",
src_mk["LIB_SOURCES"] +
# always add range_tree, it's only excluded on ppc64, which we don't use internally
src_mk["RANGE_TREE_SOURCES"] +
src_mk["TOOL_LIB_SOURCES"],
deps=["//folly/container:f14_hash"],
headers=None,
extra_external_deps="",
link_whole=True)
src_mk["TOOL_LIB_SOURCES"])
# rocksdb_test_lib
TARGETS.add_library(
"rocksdb_test_lib",
@ -165,8 +160,9 @@ def generate_targets(repo_path, deps_map):
src_mk.get("EXP_LIB_SOURCES", []) +
src_mk.get("ANALYZER_LIB_SOURCES", []),
[":rocksdb_lib"],
extra_test_libs=True
)
extra_external_deps=""" + [
("googletest", None, "gtest"),
]""")
# rocksdb_tools_lib
TARGETS.add_library(
"rocksdb_tools_lib",
@ -174,108 +170,41 @@ def generate_targets(repo_path, deps_map):
src_mk.get("ANALYZER_LIB_SOURCES", []) +
["test_util/testutil.cc"],
[":rocksdb_lib"])
# rocksdb_cache_bench_tools_lib
TARGETS.add_library(
"rocksdb_cache_bench_tools_lib",
src_mk.get("CACHE_BENCH_LIB_SOURCES", []),
[":rocksdb_lib"])
# rocksdb_stress_lib
TARGETS.add_rocksdb_library(
"rocksdb_stress_lib",
src_mk.get("ANALYZER_LIB_SOURCES", [])
+ src_mk.get('STRESS_LIB_SOURCES', [])
+ ["test_util/testutil.cc"])
# db_stress binary
TARGETS.add_binary("db_stress",
["db_stress_tool/db_stress.cc"],
[":rocksdb_stress_lib"])
# bench binaries
for src in src_mk.get("MICROBENCH_SOURCES", []):
name = src.rsplit('/',1)[1].split('.')[0] if '/' in src else src.split('.')[0]
TARGETS.add_binary(
name,
[src],
[],
extra_bench_libs=True
)
print("Extra dependencies:\n{0}".format(json.dumps(deps_map)))
# Dictionary test executable name -> relative source file path
test_source_map = {}
# c_test.c is added through TARGETS.add_c_test(). If there
# are more than one .c test file, we need to extend
# TARGETS.add_c_test() to include other C tests too.
for test_src in src_mk.get("TEST_MAIN_SOURCES_C", []):
if test_src != 'db/c_test.c':
print("Don't know how to deal with " + test_src)
return False
TARGETS.add_c_test()
try:
with open(f"{repo_path}/buckifier/bench.json") as json_file:
fast_fancy_bench_config_list = json.load(json_file)
for config_dict in fast_fancy_bench_config_list:
clean_benchmarks = {}
benchmarks = config_dict['benchmarks']
for binary, benchmark_dict in benchmarks.items():
clean_benchmarks[binary] = {}
for benchmark, overloaded_metric_list in benchmark_dict.items():
clean_benchmarks[binary][benchmark] = []
for metric in overloaded_metric_list:
if not isinstance(metric, dict):
clean_benchmarks[binary][benchmark].append(metric)
TARGETS.add_fancy_bench_config(config_dict['name'], clean_benchmarks, False, config_dict['expected_runtime_one_iter'], config_dict['sl_iterations'], config_dict['regression_threshold'])
with open(f"{repo_path}/buckifier/bench-slow.json") as json_file:
slow_fancy_bench_config_list = json.load(json_file)
for config_dict in slow_fancy_bench_config_list:
clean_benchmarks = {}
benchmarks = config_dict['benchmarks']
for binary, benchmark_dict in benchmarks.items():
clean_benchmarks[binary] = {}
for benchmark, overloaded_metric_list in benchmark_dict.items():
clean_benchmarks[binary][benchmark] = []
for metric in overloaded_metric_list:
if not isinstance(metric, dict):
clean_benchmarks[binary][benchmark].append(metric)
for config_dict in slow_fancy_bench_config_list:
TARGETS.add_fancy_bench_config(config_dict['name']+"_slow", clean_benchmarks, True, config_dict['expected_runtime_one_iter'], config_dict['sl_iterations'], config_dict['regression_threshold'])
# it is better servicelab experiments break
# than rocksdb github ci
except Exception:
pass
TARGETS.add_test_header()
for test_src in src_mk.get("TEST_MAIN_SOURCES", []):
test = test_src.split('.c')[0].strip().split('/')[-1].strip()
test_source_map[test] = test_src
print("" + test + " " + test_src)
# test for every test we found in the Makefile
for target_alias, deps in deps_map.items():
for test, test_src in sorted(test_source_map.items()):
if len(test) == 0:
print(ColorString.warning("Failed to get test name for %s" % test_src))
for test in sorted(tests):
match_src = [src for src in cc_files if ("/%s.c" % test) in src]
if len(match_src) == 0:
print(ColorString.warning("Cannot find .cc file for %s" % test))
continue
elif len(match_src) > 1:
print(ColorString.warning("Found more than one .cc for %s" % test))
print(match_src)
continue
assert(len(match_src) == 1)
is_parallel = tests[test]
test_target_name = \
test if not target_alias else test + "_" + target_alias
TARGETS.register_test(
test_target_name,
match_src[0],
is_parallel,
json.dumps(deps['extra_deps']),
json.dumps(deps['extra_compiler_flags']))
if test in _EXPORTED_TEST_LIBS:
test_library = "%s_lib" % test_target_name
TARGETS.add_library(test_library, [test_src], deps=[":rocksdb_test_lib"], extra_test_libs=True)
TARGETS.register_test(
test_target_name,
test_src,
deps = json.dumps(deps['extra_deps'] + [':'+test_library]),
extra_compiler_flags = json.dumps(deps['extra_compiler_flags']))
else:
TARGETS.register_test(
test_target_name,
test_src,
deps = json.dumps(deps['extra_deps'] + [":rocksdb_test_lib"] ),
extra_compiler_flags = json.dumps(deps['extra_compiler_flags']))
TARGETS.add_library(test_library, match_src, [":rocksdb_test_lib"])
TARGETS.flush_tests()
print(ColorString.info("Generated TARGETS Summary:"))
print(ColorString.info("- %d libs" % TARGETS.total_lib))

View File

@ -10,7 +10,6 @@ except ImportError:
from __builtin__ import object
from __builtin__ import str
import targets_cfg
import pprint
def pretty_list(lst, indent=8):
if lst is None or len(lst) == 0:
@ -26,12 +25,11 @@ def pretty_list(lst, indent=8):
class TARGETSBuilder(object):
def __init__(self, path, extra_argv):
def __init__(self, path):
self.path = path
self.targets_file = open(path, 'wb')
header = targets_cfg.rocksdb_target_header_template.format(
extra_argv=extra_argv)
self.targets_file.write(header.encode("utf-8"))
self.targets_file = open(path, 'w')
header = targets_cfg.rocksdb_target_header_template
self.targets_file.write(header)
self.total_lib = 0
self.total_bin = 0
self.total_test = 0
@ -41,73 +39,61 @@ class TARGETSBuilder(object):
self.targets_file.close()
def add_library(self, name, srcs, deps=None, headers=None,
extra_external_deps="", link_whole=False,
external_dependencies=None, extra_test_libs=False):
if headers is not None:
extra_external_deps=""):
headers_attr_prefix = ""
if headers is None:
headers_attr_prefix = "auto_"
headers = "AutoHeaders.RECURSIVE_GLOB"
else:
headers = "[" + pretty_list(headers) + "]"
self.targets_file.write(targets_cfg.library_template.format(
name=name,
srcs=pretty_list(srcs),
headers_attr_prefix=headers_attr_prefix,
headers=headers,
deps=pretty_list(deps),
extra_external_deps=extra_external_deps,
link_whole=link_whole,
external_dependencies=pretty_list(external_dependencies),
extra_test_libs=extra_test_libs
).encode("utf-8"))
extra_external_deps=extra_external_deps))
self.total_lib = self.total_lib + 1
def add_rocksdb_library(self, name, srcs, headers=None,
external_dependencies=None):
if headers is not None:
def add_rocksdb_library(self, name, srcs, headers=None):
headers_attr_prefix = ""
if headers is None:
headers_attr_prefix = "auto_"
headers = "AutoHeaders.RECURSIVE_GLOB"
else:
headers = "[" + pretty_list(headers) + "]"
self.targets_file.write(targets_cfg.rocksdb_library_template.format(
name=name,
srcs=pretty_list(srcs),
headers=headers,
external_dependencies=pretty_list(external_dependencies)
).encode("utf-8")
)
headers_attr_prefix=headers_attr_prefix,
headers=headers))
self.total_lib = self.total_lib + 1
def add_binary(self, name, srcs, deps=None, extra_preprocessor_flags=None,extra_bench_libs=False):
self.targets_file.write(targets_cfg.binary_template.format(
name=name,
srcs=pretty_list(srcs),
deps=pretty_list(deps),
extra_preprocessor_flags=pretty_list(extra_preprocessor_flags),
extra_bench_libs=extra_bench_libs,
).encode("utf-8"))
def add_binary(self, name, srcs, deps=None):
self.targets_file.write(targets_cfg.binary_template % (
name,
pretty_list(srcs),
pretty_list(deps)))
self.total_bin = self.total_bin + 1
def add_c_test(self):
self.targets_file.write(b"""
add_c_test_wrapper()
""")
def add_test_header(self):
self.targets_file.write(b"""
# Generate a test rule for each entry in ROCKS_TESTS
# Do not build the tests in opt mode, since SyncPoint and other test code
# will not be included.
""")
def add_fancy_bench_config(self, name, bench_config, slow, expected_runtime, sl_iterations, regression_threshold):
self.targets_file.write(targets_cfg.fancy_bench_template.format(
name=name,
bench_config=pprint.pformat(bench_config),
slow=slow,
expected_runtime=expected_runtime,
sl_iterations=sl_iterations,
regression_threshold=regression_threshold
).encode("utf-8"))
def register_test(self,
test_name,
src,
deps,
is_parallel,
extra_deps,
extra_compiler_flags):
exec_mode = "serial"
if is_parallel:
exec_mode = "parallel"
self.tests_cfg += targets_cfg.test_cfg_template % (
test_name,
str(src),
str(exec_mode),
extra_deps,
extra_compiler_flags)
self.targets_file.write(targets_cfg.unittests_template.format(test_name=test_name,test_cc=str(src),deps=deps,
extra_compiler_flags=extra_compiler_flags).encode("utf-8"))
self.total_test = self.total_test + 1
def flush_tests(self):
self.targets_file.write(targets_cfg.unittests_template % self.tests_cfg)
self.tests_cfg = ""

View File

@ -5,42 +5,196 @@ from __future__ import print_function
from __future__ import unicode_literals
rocksdb_target_header_template = \
"""# This file \100generated by:
#$ python3 buckifier/buckify_rocksdb.py{extra_argv}
"""# This file \100generated by `python3 buckifier/buckify_rocksdb.py`
# --> DO NOT EDIT MANUALLY <--
# This file is a Facebook-specific integration for buck builds, so can
# only be validated by Facebook employees.
#
# @noautodeps @nocodemods
load("//rocks/buckifier:defs.bzl", "cpp_library_wrapper","rocks_cpp_library_wrapper","cpp_binary_wrapper","cpp_unittest_wrapper","fancy_bench_wrapper","add_c_test_wrapper")
load("@fbcode_macros//build_defs:auto_headers.bzl", "AutoHeaders")
load("@fbcode_macros//build_defs:cpp_library.bzl", "cpp_library")
load(":defs.bzl", "test_binary")
REPO_PATH = package_name() + "/"
ROCKSDB_COMPILER_FLAGS = [
"-fno-builtin-memcmp",
# Needed to compile in fbcode
"-Wno-expansion-to-defined",
# Added missing flags from output of build_detect_platform
"-Wnarrowing",
"-DROCKSDB_NO_DYNAMIC_EXTENSION",
]
ROCKSDB_EXTERNAL_DEPS = [
("bzip2", None, "bz2"),
("snappy", None, "snappy"),
("zlib", None, "z"),
("gflags", None, "gflags"),
("lz4", None, "lz4"),
("zstd", None),
("tbb", None),
]
ROCKSDB_OS_DEPS = [
(
"linux",
["third-party//numa:numa", "third-party//liburing:uring"],
),
]
ROCKSDB_OS_PREPROCESSOR_FLAGS = [
(
"linux",
[
"-DOS_LINUX",
"-DROCKSDB_FALLOCATE_PRESENT",
"-DROCKSDB_MALLOC_USABLE_SIZE",
"-DROCKSDB_PTHREAD_ADAPTIVE_MUTEX",
"-DROCKSDB_RANGESYNC_PRESENT",
"-DROCKSDB_SCHED_GETCPU_PRESENT",
"-DROCKSDB_IOURING_PRESENT",
"-DHAVE_SSE42",
"-DLIBURING",
"-DNUMA",
],
),
(
"macos",
["-DOS_MACOSX"],
),
]
ROCKSDB_PREPROCESSOR_FLAGS = [
"-DROCKSDB_PLATFORM_POSIX",
"-DROCKSDB_LIB_IO_POSIX",
"-DROCKSDB_SUPPORT_THREAD_LOCAL",
# Flags to enable libs we include
"-DSNAPPY",
"-DZLIB",
"-DBZIP2",
"-DLZ4",
"-DZSTD",
"-DZSTD_STATIC_LINKING_ONLY",
"-DGFLAGS=gflags",
"-DTBB",
# Added missing flags from output of build_detect_platform
"-DROCKSDB_BACKTRACE",
# Directories with files for #include
"-I" + REPO_PATH + "include/",
"-I" + REPO_PATH,
]
ROCKSDB_ARCH_PREPROCESSOR_FLAGS = {
"x86_64": [
"-DHAVE_PCLMUL",
],
}
build_mode = read_config("fbcode", "build_mode")
is_opt_mode = build_mode.startswith("opt")
# -DNDEBUG is added by default in opt mode in fbcode. But adding it twice
# doesn't harm and avoid forgetting to add it.
ROCKSDB_COMPILER_FLAGS += (["-DNDEBUG"] if is_opt_mode else [])
sanitizer = read_config("fbcode", "sanitizer")
# Do not enable jemalloc if sanitizer presents. RocksDB will further detect
# whether the binary is linked with jemalloc at runtime.
ROCKSDB_OS_PREPROCESSOR_FLAGS += ([(
"linux",
["-DROCKSDB_JEMALLOC"],
)] if sanitizer == "" else [])
ROCKSDB_OS_DEPS += ([(
"linux",
["third-party//jemalloc:headers"],
)] if sanitizer == "" else [])
ROCKSDB_LIB_DEPS = [
":rocksdb_lib",
":rocksdb_test_lib",
] if not is_opt_mode else [":rocksdb_lib"]
"""
library_template = """
cpp_library_wrapper(name="{name}", srcs=[{srcs}], deps=[{deps}], headers={headers}, link_whole={link_whole}, extra_test_libs={extra_test_libs})
cpp_library(
name = "{name}",
srcs = [{srcs}],
{headers_attr_prefix}headers = {headers},
arch_preprocessor_flags = ROCKSDB_ARCH_PREPROCESSOR_FLAGS,
compiler_flags = ROCKSDB_COMPILER_FLAGS,
os_deps = ROCKSDB_OS_DEPS,
os_preprocessor_flags = ROCKSDB_OS_PREPROCESSOR_FLAGS,
preprocessor_flags = ROCKSDB_PREPROCESSOR_FLAGS,
deps = [{deps}],
external_deps = ROCKSDB_EXTERNAL_DEPS{extra_external_deps},
)
"""
rocksdb_library_template = """
rocks_cpp_library_wrapper(name="{name}", srcs=[{srcs}], headers={headers})
cpp_library(
name = "{name}",
srcs = [{srcs}],
{headers_attr_prefix}headers = {headers},
arch_preprocessor_flags = ROCKSDB_ARCH_PREPROCESSOR_FLAGS,
compiler_flags = ROCKSDB_COMPILER_FLAGS,
os_deps = ROCKSDB_OS_DEPS,
os_preprocessor_flags = ROCKSDB_OS_PREPROCESSOR_FLAGS,
preprocessor_flags = ROCKSDB_PREPROCESSOR_FLAGS,
deps = ROCKSDB_LIB_DEPS,
external_deps = ROCKSDB_EXTERNAL_DEPS,
)
"""
binary_template = """
cpp_binary_wrapper(name="{name}", srcs=[{srcs}], deps=[{deps}], extra_preprocessor_flags=[{extra_preprocessor_flags}], extra_bench_libs={extra_bench_libs})
cpp_binary(
name = "%s",
srcs = [%s],
arch_preprocessor_flags = ROCKSDB_ARCH_PREPROCESSOR_FLAGS,
compiler_flags = ROCKSDB_COMPILER_FLAGS,
preprocessor_flags = ROCKSDB_PREPROCESSOR_FLAGS,
deps = [%s],
external_deps = ROCKSDB_EXTERNAL_DEPS,
)
"""
test_cfg_template = """ [
"%s",
"%s",
"%s",
%s,
%s,
],
"""
unittests_template = """
cpp_unittest_wrapper(name="{test_name}",
srcs=["{test_cc}"],
deps={deps},
extra_compiler_flags={extra_compiler_flags})
"""
fancy_bench_template = """
fancy_bench_wrapper(suite_name="{name}", binary_to_bench_to_metric_list_map={bench_config}, slow={slow}, expected_runtime={expected_runtime}, sl_iterations={sl_iterations}, regression_threshold={regression_threshold})
# [test_name, test_src, test_type, extra_deps, extra_compiler_flags]
ROCKS_TESTS = [
%s]
# Generate a test rule for each entry in ROCKS_TESTS
# Do not build the tests in opt mode, since SyncPoint and other test code
# will not be included.
[
cpp_unittest(
name = test_name,
srcs = [test_cc],
arch_preprocessor_flags = ROCKSDB_ARCH_PREPROCESSOR_FLAGS,
os_preprocessor_flags = ROCKSDB_OS_PREPROCESSOR_FLAGS,
compiler_flags = ROCKSDB_COMPILER_FLAGS,
preprocessor_flags = ROCKSDB_PREPROCESSOR_FLAGS,
deps = [":rocksdb_test_lib"] + extra_deps,
external_deps = ROCKSDB_EXTERNAL_DEPS + [
("googletest", None, "gtest"),
],
)
for test_name, test_cc, parallelism, extra_deps, extra_compiler_flags in ROCKS_TESTS
if not is_opt_mode
]
"""

View File

@ -45,30 +45,23 @@ if test -z "$OUTPUT"; then
exit 1
fi
# we depend on C++17, but should be compatible with newer standards
if [ "$ROCKSDB_CXX_STANDARD" ]; then
PLATFORM_CXXFLAGS="-std=$ROCKSDB_CXX_STANDARD"
else
PLATFORM_CXXFLAGS="-std=c++17"
fi
# we depend on C++11
PLATFORM_CXXFLAGS="-std=c++11"
# we currently depend on POSIX platform
COMMON_FLAGS="-DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX"
# Default to fbcode gcc on internal fb machines
if [ -z "$ROCKSDB_NO_FBCODE" -a -d /mnt/gvfs/third-party ]; then
FBCODE_BUILD="true"
# If we're compiling with TSAN or shared lib, we need pic build
# If we're compiling with TSAN we need pic build
PIC_BUILD=$COMPILE_WITH_TSAN
if [ "$LIB_MODE" == "shared" ]; then
PIC_BUILD=1
fi
if [ -n "$ROCKSDB_FBCODE_BUILD_WITH_PLATFORM010" ]; then
source "$PWD/build_tools/fbcode_config_platform010.sh"
elif [ -n "$ROCKSDB_FBCODE_BUILD_WITH_PLATFORM009" ]; then
source "$PWD/build_tools/fbcode_config_platform009.sh"
if [ -n "$ROCKSDB_FBCODE_BUILD_WITH_481" ]; then
# we need this to build with MySQL. Don't use for other purposes.
source "$PWD/build_tools/fbcode_config4.8.1.sh"
elif [ -n "$ROCKSDB_FBCODE_BUILD_WITH_5xx" ]; then
source "$PWD/build_tools/fbcode_config.sh"
else
source "$PWD/build_tools/fbcode_config_platform009.sh"
source "$PWD/build_tools/fbcode_config_platform007.sh"
fi
fi
@ -96,16 +89,6 @@ if test -z "$CXX"; then
fi
fi
if test -z "$AR"; then
if [ -x "$(command -v gcc-ar)" ]; then
AR=gcc-ar
elif [ -x "$(command -v llvm-ar)" ]; then
AR=llvm-ar
else
AR=ar
fi
fi
# Detect OS
if test -z "$TARGET_OS"; then
TARGET_OS=`uname -s`
@ -168,13 +151,10 @@ case "$TARGET_OS" in
else
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -latomic"
fi
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lpthread -lrt -ldl"
if test -z "$ROCKSDB_USE_IO_URING"; then
ROCKSDB_USE_IO_URING=1
fi
if test "$ROCKSDB_USE_IO_URING" -ne 0; then
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lpthread -lrt"
if test $ROCKSDB_USE_IO_URING; then
# check for liburing
$CXX $PLATFORM_CXXFLAGS -x c++ - -luring -o test.o 2>/dev/null <<EOF
$CXX $CFLAGS -x c++ - -luring -o /dev/null 2>/dev/null <<EOF
#include <liburing.h>
int main() {
struct io_uring ring;
@ -187,6 +167,9 @@ EOF
COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_IOURING_PRESENT"
fi
fi
if test -z "$USE_FOLLY_DISTRIBUTED_MUTEX"; then
USE_FOLLY_DISTRIBUTED_MUTEX=1
fi
# PORT_FILES=port/linux/linux_specific.cc
;;
SunOS)
@ -209,17 +192,6 @@ EOF
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lpthread"
# PORT_FILES=port/freebsd/freebsd_specific.cc
;;
GNU/kFreeBSD)
PLATFORM=OS_GNU_KFREEBSD
COMMON_FLAGS="$COMMON_FLAGS -DOS_GNU_KFREEBSD"
if [ -z "$USE_CLANG" ]; then
COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp"
else
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -latomic"
fi
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lpthread -lrt"
# PORT_FILES=port/gnu_kfreebsd/gnu_kfreebsd_specific.cc
;;
NetBSD)
PLATFORM=OS_NETBSD
COMMON_FLAGS="$COMMON_FLAGS -fno-builtin-memcmp -D_REENTRANT -DOS_NETBSD"
@ -269,7 +241,7 @@ esac
PLATFORM_CXXFLAGS="$PLATFORM_CXXFLAGS ${CXXFLAGS}"
JAVA_LDFLAGS="$PLATFORM_LDFLAGS"
JAVA_STATIC_LDFLAGS="$PLATFORM_LDFLAGS"
JAVAC_ARGS="-source 8"
JAVAC_ARGS="-source 7"
if [ "$CROSS_COMPILE" = "true" -o "$FBCODE_BUILD" = "true" ]; then
# Cross-compiling; do not try any compilation tests.
@ -282,7 +254,7 @@ if [ "$CROSS_COMPILE" = "true" -o "$FBCODE_BUILD" = "true" ]; then
else
if ! test $ROCKSDB_DISABLE_FALLOCATE; then
# Test whether fallocate is available
$CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null <<EOF
$CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <fcntl.h>
#include <linux/falloc.h>
int main() {
@ -298,7 +270,7 @@ EOF
if ! test $ROCKSDB_DISABLE_SNAPPY; then
# Test whether Snappy library is installed
# http://code.google.com/p/snappy/
$CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null <<EOF
$CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <snappy.h>
int main() {}
EOF
@ -313,38 +285,30 @@ EOF
# Test whether gflags library is installed
# http://gflags.github.io/gflags/
# check if the namespace is gflags
if $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null << EOF
$CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null << EOF
#include <gflags/gflags.h>
using namespace GFLAGS_NAMESPACE;
int main() {}
EOF
then
COMMON_FLAGS="$COMMON_FLAGS -DGFLAGS=1"
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lgflags"
# check if namespace is gflags
elif $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null << EOF
#include <gflags/gflags.h>
using namespace gflags;
int main() {}
EOF
then
COMMON_FLAGS="$COMMON_FLAGS -DGFLAGS=1 -DGFLAGS_NAMESPACE=gflags"
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lgflags"
# check if namespace is google
elif $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null << EOF
if [ "$?" = 0 ]; then
COMMON_FLAGS="$COMMON_FLAGS -DGFLAGS=1"
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lgflags"
else
# check if namespace is google
$CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null << EOF
#include <gflags/gflags.h>
using namespace google;
int main() {}
EOF
then
COMMON_FLAGS="$COMMON_FLAGS -DGFLAGS=1 -DGFLAGS_NAMESPACE=google"
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lgflags"
if [ "$?" = 0 ]; then
COMMON_FLAGS="$COMMON_FLAGS -DGFLAGS=google"
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lgflags"
fi
fi
fi
if ! test $ROCKSDB_DISABLE_ZLIB; then
# Test whether zlib library is installed
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o test.o 2>/dev/null <<EOF
$CXX $CFLAGS $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <zlib.h>
int main() {}
EOF
@ -357,7 +321,7 @@ EOF
if ! test $ROCKSDB_DISABLE_BZIP; then
# Test whether bzip library is installed
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o test.o 2>/dev/null <<EOF
$CXX $CFLAGS $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <bzlib.h>
int main() {}
EOF
@ -370,7 +334,7 @@ EOF
if ! test $ROCKSDB_DISABLE_LZ4; then
# Test whether lz4 library is installed
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o test.o 2>/dev/null <<EOF
$CXX $CFLAGS $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <lz4.h>
#include <lz4hc.h>
int main() {}
@ -384,7 +348,7 @@ EOF
if ! test $ROCKSDB_DISABLE_ZSTD; then
# Test whether zstd library is installed
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
$CXX $CFLAGS $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <zstd.h>
int main() {}
EOF
@ -397,7 +361,7 @@ EOF
if ! test $ROCKSDB_DISABLE_NUMA; then
# Test whether numa is available
$CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o -lnuma 2>/dev/null <<EOF
$CXX $CFLAGS -x c++ - -o /dev/null -lnuma 2>/dev/null <<EOF
#include <numa.h>
#include <numaif.h>
int main() {}
@ -411,7 +375,7 @@ EOF
if ! test $ROCKSDB_DISABLE_TBB; then
# Test whether tbb is available
$CXX $PLATFORM_CXXFLAGS $LDFLAGS -x c++ - -o test.o -ltbb 2>/dev/null <<EOF
$CXX $CFLAGS $LDFLAGS -x c++ - -o /dev/null -ltbb 2>/dev/null <<EOF
#include <tbb/tbb.h>
int main() {}
EOF
@ -424,7 +388,7 @@ EOF
if ! test $ROCKSDB_DISABLE_JEMALLOC; then
# Test whether jemalloc is available
if echo 'int main() {}' | $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o -ljemalloc \
if echo 'int main() {}' | $CXX $CFLAGS -x c++ - -o /dev/null -ljemalloc \
2>/dev/null; then
# This will enable some preprocessor identifiers in the Makefile
JEMALLOC=1
@ -445,7 +409,7 @@ EOF
fi
if ! test $JEMALLOC && ! test $ROCKSDB_DISABLE_TCMALLOC; then
# jemalloc is not available. Let's try tcmalloc
if echo 'int main() {}' | $CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o \
if echo 'int main() {}' | $CXX $CFLAGS -x c++ - -o /dev/null \
-ltcmalloc 2>/dev/null; then
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -ltcmalloc"
JAVA_LDFLAGS="$JAVA_LDFLAGS -ltcmalloc"
@ -454,7 +418,7 @@ EOF
if ! test $ROCKSDB_DISABLE_MALLOC_USABLE_SIZE; then
# Test whether malloc_usable_size is available
$CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null <<EOF
$CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <malloc.h>
int main() {
size_t res = malloc_usable_size(0);
@ -466,10 +430,10 @@ EOF
COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_MALLOC_USABLE_SIZE"
fi
fi
if ! test $ROCKSDB_DISABLE_MEMKIND; then
# Test whether memkind library is installed
$CXX $PLATFORM_CXXFLAGS $LDFLAGS -x c++ - -o test.o -lmemkind 2>/dev/null <<EOF
$CXX $CFLAGS $COMMON_FLAGS -lmemkind -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <memkind.h>
int main() {
memkind_malloc(MEMKIND_DAX_KMEM, 1024);
@ -482,10 +446,10 @@ EOF
JAVA_LDFLAGS="$JAVA_LDFLAGS -lmemkind"
fi
fi
if ! test $ROCKSDB_DISABLE_PTHREAD_MUTEX_ADAPTIVE_NP; then
# Test whether PTHREAD_MUTEX_ADAPTIVE_NP mutex type is available
$CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null <<EOF
$CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <pthread.h>
int main() {
int x = PTHREAD_MUTEX_ADAPTIVE_NP;
@ -500,7 +464,7 @@ EOF
if ! test $ROCKSDB_DISABLE_BACKTRACE; then
# Test whether backtrace is available
$CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null <<EOF
$CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <execinfo.h>
int main() {
void* frames[1];
@ -512,7 +476,7 @@ EOF
COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_BACKTRACE"
else
# Test whether execinfo library is installed
$CXX $PLATFORM_CXXFLAGS -lexecinfo -x c++ - -o test.o 2>/dev/null <<EOF
$CXX $CFLAGS -lexecinfo -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <execinfo.h>
int main() {
void* frames[1];
@ -529,7 +493,7 @@ EOF
if ! test $ROCKSDB_DISABLE_PG; then
# Test if -pg is supported
$CXX $PLATFORM_CXXFLAGS -pg -x c++ - -o test.o 2>/dev/null <<EOF
$CXX $CFLAGS -pg -x c++ - -o /dev/null 2>/dev/null <<EOF
int main() {
return 0;
}
@ -541,7 +505,7 @@ EOF
if ! test $ROCKSDB_DISABLE_SYNC_FILE_RANGE; then
# Test whether sync_file_range is supported for compatibility with an old glibc
$CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null <<EOF
$CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <fcntl.h>
int main() {
int fd = open("/dev/null", 0);
@ -555,7 +519,7 @@ EOF
if ! test $ROCKSDB_DISABLE_SCHED_GETCPU; then
# Test whether sched_getcpu is supported
$CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null <<EOF
$CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <sched.h>
int main() {
int cpuid = sched_getcpu();
@ -569,7 +533,7 @@ EOF
if ! test $ROCKSDB_DISABLE_AUXV_GETAUXVAL; then
# Test whether getauxval is supported
$CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o 2>/dev/null <<EOF
$CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <sys/auxv.h>
int main() {
uint64_t auxv = getauxval(AT_HWCAP);
@ -583,7 +547,7 @@ EOF
if ! test $ROCKSDB_DISABLE_ALIGNED_NEW; then
# Test whether c++17 aligned-new is supported
$CXX $PLATFORM_CXXFLAGS -faligned-new -x c++ - -o test.o 2>/dev/null <<EOF
$CXX $PLATFORM_CXXFLAGS -faligned-new -x c++ - -o /dev/null 2>/dev/null <<EOF
struct alignas(1024) t {int a;};
int main() {}
EOF
@ -591,23 +555,13 @@ EOF
PLATFORM_CXXFLAGS="$PLATFORM_CXXFLAGS -faligned-new -DHAVE_ALIGNED_NEW"
fi
fi
if ! test $ROCKSDB_DISABLE_BENCHMARK; then
# Test whether google benchmark is available
$CXX $PLATFORM_CXXFLAGS -x c++ - -o /dev/null -lbenchmark -lpthread 2>/dev/null <<EOF
#include <benchmark/benchmark.h>
int main() {}
EOF
if [ "$?" = 0 ]; then
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lbenchmark"
fi
fi
fi
# TODO(tec): Fix -Wshorten-64-to-32 errors on FreeBSD and enable the warning.
# -Wshorten-64-to-32 breaks compilation on FreeBSD aarch64 and i386
if ! { [ "$TARGET_OS" = FreeBSD ] && [ "$TARGET_ARCHITECTURE" = arm64 -o "$TARGET_ARCHITECTURE" = i386 ]; }; then
# -Wshorten-64-to-32 breaks compilation on FreeBSD i386
if ! [ "$TARGET_OS" = FreeBSD -a "$TARGET_ARCHITECTURE" = i386 ]; then
# Test whether -Wshorten-64-to-32 is available
$CXX $PLATFORM_CXXFLAGS -x c++ - -o test.o -Wshorten-64-to-32 2>/dev/null <<EOF
$CXX $CFLAGS -x c++ - -o /dev/null -Wshorten-64-to-32 2>/dev/null <<EOF
int main() {}
EOF
if [ "$?" = 0 ]; then
@ -615,27 +569,34 @@ EOF
fi
fi
# shall we use HDFS?
if test "$USE_HDFS"; then
if test -z "$JAVA_HOME"; then
echo "JAVA_HOME has to be set for HDFS usage." >&2
exit 1
fi
HDFS_CCFLAGS="$HDFS_CCFLAGS -I$JAVA_HOME/include -I$JAVA_HOME/include/linux -DUSE_HDFS -I$HADOOP_HOME/include"
HDFS_LDFLAGS="$HDFS_LDFLAGS -lhdfs -L$JAVA_HOME/jre/lib/amd64 -L$HADOOP_HOME/lib/native"
HDFS_LDFLAGS="$HDFS_LDFLAGS -L$JAVA_HOME/jre/lib/amd64/server -L$GLIBC_RUNTIME_PATH/lib"
HDFS_LDFLAGS="$HDFS_LDFLAGS -ldl -lverify -ljava -ljvm"
COMMON_FLAGS="$COMMON_FLAGS $HDFS_CCFLAGS"
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS $HDFS_LDFLAGS"
JAVA_LDFLAGS="$JAVA_LDFLAGS $HDFS_LDFLAGS"
fi
if test "0$PORTABLE" -eq 0; then
if test -n "`echo $TARGET_ARCHITECTURE | grep ^ppc64`"; then
# Tune for this POWER processor, treating '+' models as base models
POWER=`LD_SHOW_AUXV=1 /bin/true | grep AT_PLATFORM | grep -E -o power[0-9]+`
COMMON_FLAGS="$COMMON_FLAGS -mcpu=$POWER -mtune=$POWER "
elif test -n "`echo $TARGET_ARCHITECTURE | grep ^s390x`"; then
COMMON_FLAGS="$COMMON_FLAGS -march=z10 "
elif test -n "`echo $TARGET_ARCHITECTURE | grep -e^arm -e^aarch64`"; then
# TODO: Handle this with approprite options.
COMMON_FLAGS="$COMMON_FLAGS"
elif test -n "`echo $TARGET_ARCHITECTURE | grep ^aarch64`"; then
COMMON_FLAGS="$COMMON_FLAGS"
elif test -n "`echo $TARGET_ARCHITECTURE | grep ^s390x`"; then
if echo 'int main() {}' | $CXX $PLATFORM_CXXFLAGS -x c++ \
-march=native - -o /dev/null 2>/dev/null; then
COMMON_FLAGS="$COMMON_FLAGS -march=native "
else
COMMON_FLAGS="$COMMON_FLAGS -march=z196 "
fi
COMMON_FLAGS="$COMMON_FLAGS"
elif test -n "`echo $TARGET_ARCHITECTURE | grep ^riscv64`"; then
RISC_ISA=$(cat /proc/cpuinfo | grep isa | head -1 | cut --delimiter=: -f 2 | cut -b 2-)
COMMON_FLAGS="$COMMON_FLAGS -march=${RISC_ISA}"
elif [ "$TARGET_OS" == "IOS" ]; then
COMMON_FLAGS="$COMMON_FLAGS"
elif [ "$TARGET_OS" == "AIX" ] || [ "$TARGET_OS" == "SunOS" ]; then
@ -651,45 +612,6 @@ else
if test "$USE_SSE"; then
TRY_SSE_ETC="1"
fi
if test -n "`echo $TARGET_ARCHITECTURE | grep ^s390x`"; then
COMMON_FLAGS="$COMMON_FLAGS -march=z196 "
fi
if test -n "`echo $TARGET_ARCHITECTURE | grep ^riscv64`"; then
RISC_ISA=$(cat /proc/cpuinfo | grep isa | head -1 | cut --delimiter=: -f 2 | cut -b 2-)
COMMON_FLAGS="$COMMON_FLAGS -march=${RISC_ISA}"
fi
if [[ "${PLATFORM}" == "OS_MACOSX" ]]; then
# For portability compile for macOS 10.12 (2016) or newer
COMMON_FLAGS="$COMMON_FLAGS -mmacosx-version-min=10.12"
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -mmacosx-version-min=10.12"
# -mmacosx-version-min must come first here.
PLATFORM_SHARED_LDFLAGS="-mmacosx-version-min=10.12 $PLATFORM_SHARED_LDFLAGS"
PLATFORM_CMAKE_FLAGS="-DCMAKE_OSX_DEPLOYMENT_TARGET=10.12"
JAVA_STATIC_DEPS_COMMON_FLAGS="-mmacosx-version-min=10.12"
JAVA_STATIC_DEPS_LDFLAGS="$JAVA_STATIC_DEPS_COMMON_FLAGS"
JAVA_STATIC_DEPS_CCFLAGS="$JAVA_STATIC_DEPS_COMMON_FLAGS"
JAVA_STATIC_DEPS_CXXFLAGS="$JAVA_STATIC_DEPS_COMMON_FLAGS"
fi
fi
if test -n "`echo $TARGET_ARCHITECTURE | grep ^ppc64`"; then
# check for GNU libc on ppc64
$CXX -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <stdio.h>
#include <stdlib.h>
#include <gnu/libc-version.h>
int main(int argc, char *argv[]) {
printf("GNU libc version: %s\n", gnu_get_libc_version());
return 0;
}
EOF
if [ "$?" != 0 ]; then
PPC_LIBC_IS_GNU=0
fi
fi
if test "$TRY_SSE_ETC"; then
@ -699,21 +621,14 @@ if test "$TRY_SSE_ETC"; then
# It doesn't even really check that your current CPU is compatible.
#
# SSE4.2 available since nehalem, ca. 2008-2010
# Includes POPCNT for BitsSetToOne, BitParity
TRY_SSE42="-msse4.2"
# PCLMUL available since westmere, ca. 2010-2011
TRY_PCLMUL="-mpclmul"
# AVX2 available since haswell, ca. 2013-2015
TRY_AVX2="-mavx2"
# BMI available since haswell, ca. 2013-2015
# Primarily for TZCNT for CountTrailingZeroBits
TRY_BMI="-mbmi"
# LZCNT available since haswell, ca. 2013-2015
# For FloorLog2
TRY_LZCNT="-mlzcnt"
fi
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS $TRY_SSE42 -x c++ - -o test.o 2>/dev/null <<EOF
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS $TRY_SSE42 -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <cstdint>
#include <nmmintrin.h>
int main() {
@ -727,7 +642,7 @@ elif test "$USE_SSE"; then
echo "warning: USE_SSE specified but compiler could not use SSE intrinsics, disabling" >&2
fi
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS $TRY_PCLMUL -x c++ - -o test.o 2>/dev/null <<EOF
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS $TRY_PCLMUL -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <cstdint>
#include <wmmintrin.h>
int main() {
@ -744,7 +659,7 @@ elif test "$USE_SSE"; then
echo "warning: USE_SSE specified but compiler could not use PCLMUL intrinsics, disabling" >&2
fi
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS $TRY_AVX2 -x c++ - -o test.o 2>/dev/null <<EOF
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS $TRY_AVX2 -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <cstdint>
#include <immintrin.h>
int main() {
@ -759,35 +674,7 @@ elif test "$USE_SSE"; then
echo "warning: USE_SSE specified but compiler could not use AVX2 intrinsics, disabling" >&2
fi
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS $TRY_BMI -x c++ - -o test.o 2>/dev/null <<EOF
#include <cstdint>
#include <immintrin.h>
int main(int argc, char *argv[]) {
(void)argv;
return (int)_tzcnt_u64((uint64_t)argc);
}
EOF
if [ "$?" = 0 ]; then
COMMON_FLAGS="$COMMON_FLAGS $TRY_BMI -DHAVE_BMI"
elif test "$USE_SSE"; then
echo "warning: USE_SSE specified but compiler could not use BMI intrinsics, disabling" >&2
fi
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS $TRY_LZCNT -x c++ - -o test.o 2>/dev/null <<EOF
#include <cstdint>
#include <immintrin.h>
int main(int argc, char *argv[]) {
(void)argv;
return (int)_lzcnt_u64((uint64_t)argc);
}
EOF
if [ "$?" = 0 ]; then
COMMON_FLAGS="$COMMON_FLAGS $TRY_LZCNT -DHAVE_LZCNT"
elif test "$USE_SSE"; then
echo "warning: USE_SSE specified but compiler could not use LZCNT intrinsics, disabling" >&2
fi
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o test.o 2>/dev/null <<EOF
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <cstdint>
int main() {
uint64_t a = 0xffffFFFFffffFFFF;
@ -800,15 +687,31 @@ if [ "$?" = 0 ]; then
COMMON_FLAGS="$COMMON_FLAGS -DHAVE_UINT128_EXTENSION"
fi
# thread_local is part of C++11 and later (TODO: clean up this define)
COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_SUPPORT_THREAD_LOCAL"
# iOS doesn't support thread-local storage, but this check would erroneously
# succeed because the cross-compiler flags are added by the Makefile, not this
# script.
if [ "$PLATFORM" != IOS ]; then
$CXX $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
#if defined(_MSC_VER) && !defined(__thread)
#define __thread __declspec(thread)
#endif
int main() {
static __thread int tls;
(void)tls;
}
EOF
if [ "$?" = 0 ]; then
COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_SUPPORT_THREAD_LOCAL"
fi
fi
if [ "$FBCODE_BUILD" != "true" -a "$PLATFORM" = OS_LINUX ]; then
$CXX $COMMON_FLAGS $PLATFORM_SHARED_CFLAGS -x c++ -c - -o test_dl.o 2>/dev/null <<EOF
void dummy_func() {}
EOF
if [ "$?" = 0 ]; then
$CXX $COMMON_FLAGS $PLATFORM_SHARED_LDFLAGS test_dl.o -o test.o 2>/dev/null
$CXX $COMMON_FLAGS $PLATFORM_SHARED_LDFLAGS test_dl.o -o /dev/null 2>/dev/null
if [ "$?" = 0 ]; then
EXEC_LDFLAGS+="-ldl"
rm -f test_dl.o
@ -816,20 +719,6 @@ EOF
fi
fi
# check for F_FULLFSYNC
$CXX $PLATFORM_CXXFALGS -x c++ - -o test.o 2>/dev/null <<EOF
#include <fcntl.h>
int main() {
fcntl(0, F_FULLFSYNC);
return 0;
}
EOF
if [ "$?" = 0 ]; then
COMMON_FLAGS="$COMMON_FLAGS -DHAVE_FULLFSYNC"
fi
rm -f test.o test_dl.o
PLATFORM_CCFLAGS="$PLATFORM_CCFLAGS $COMMON_FLAGS"
PLATFORM_CXXFLAGS="$PLATFORM_CXXFLAGS $COMMON_FLAGS"
@ -841,15 +730,10 @@ ROCKSDB_PATCH=`build_tools/version.sh patch`
echo "CC=$CC" >> "$OUTPUT"
echo "CXX=$CXX" >> "$OUTPUT"
echo "AR=$AR" >> "$OUTPUT"
echo "PLATFORM=$PLATFORM" >> "$OUTPUT"
echo "PLATFORM_LDFLAGS=$PLATFORM_LDFLAGS" >> "$OUTPUT"
echo "PLATFORM_CMAKE_FLAGS=$PLATFORM_CMAKE_FLAGS" >> "$OUTPUT"
echo "JAVA_LDFLAGS=$JAVA_LDFLAGS" >> "$OUTPUT"
echo "JAVA_STATIC_LDFLAGS=$JAVA_STATIC_LDFLAGS" >> "$OUTPUT"
echo "JAVA_STATIC_DEPS_CCFLAGS=$JAVA_STATIC_DEPS_CCFLAGS" >> "$OUTPUT"
echo "JAVA_STATIC_DEPS_CXXFLAGS=$JAVA_STATIC_DEPS_CXXFLAGS" >> "$OUTPUT"
echo "JAVA_STATIC_DEPS_LDFLAGS=$JAVA_STATIC_DEPS_LDFLAGS" >> "$OUTPUT"
echo "JAVAC_ARGS=$JAVAC_ARGS" >> "$OUTPUT"
echo "VALGRIND_VER=$VALGRIND_VER" >> "$OUTPUT"
echo "PLATFORM_CCFLAGS=$PLATFORM_CCFLAGS" >> "$OUTPUT"
@ -880,9 +764,6 @@ if test -n "$WITH_JEMALLOC_FLAG"; then
echo "WITH_JEMALLOC_FLAG=$WITH_JEMALLOC_FLAG" >> "$OUTPUT"
fi
echo "LUA_PATH=$LUA_PATH" >> "$OUTPUT"
if test -n "$USE_FOLLY"; then
echo "USE_FOLLY=$USE_FOLLY" >> "$OUTPUT"
fi
if test -n "$PPC_LIBC_IS_GNU"; then
echo "PPC_LIBC_IS_GNU=$PPC_LIBC_IS_GNU" >> "$OUTPUT"
if test -n "$USE_FOLLY_DISTRIBUTED_MUTEX"; then
echo "USE_FOLLY_DISTRIBUTED_MUTEX=$USE_FOLLY_DISTRIBUTED_MUTEX" >> "$OUTPUT"
fi

View File

@ -1,36 +0,0 @@
#!/usr/bin/env bash
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
#
# Check for some simple mistakes that should prevent commit or push
BAD=""
git grep 'namespace rocksdb' -- '*.[ch]*'
if [ "$?" != "1" ]; then
echo "^^^^^ Do not hardcode namespace rocksdb. Use ROCKSDB_NAMESPACE"
BAD=1
fi
git grep -i 'nocommit' -- ':!build_tools/check-sources.sh'
if [ "$?" != "1" ]; then
echo "^^^^^ Code was not intended to be committed"
BAD=1
fi
git grep '<rocksdb/' -- ':!build_tools/check-sources.sh'
if [ "$?" != "1" ]; then
echo '^^^^^ Use double-quotes as in #include "rocksdb/something.h"'
BAD=1
fi
git grep 'using namespace' -- ':!build_tools' ':!docs' \
':!third-party/folly/folly/lang/Align.h' \
':!third-party/gtest-1.8.1/fused-src/gtest/gtest.h'
if [ "$?" != "1" ]; then
echo '^^^^ Do not use "using namespace"'
BAD=1
fi
if [ "$BAD" ]; then
exit 1
fi

View File

@ -0,0 +1,19 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
GCC_BASE=/mnt/gvfs/third-party2/gcc/7331085db891a2ef4a88a48a751d834e8d68f4cb/5.x/centos7-native/c447969
CLANG_BASE=/mnt/gvfs/third-party2/llvm-fb/1bd23f9917738974ad0ff305aa23eb5f93f18305/9.0.0/centos7-native/c9f9104
LIBGCC_BASE=/mnt/gvfs/third-party2/libgcc/6ace84e956873d53638c738b6f65f3f469cca74c/5.x/gcc-5-glibc-2.23/339d858
GLIBC_BASE=/mnt/gvfs/third-party2/glibc/192b0f42d63dcf6210d6ceae387b49af049e6e0c/2.23/gcc-5-glibc-2.23/ca1d1c0
SNAPPY_BASE=/mnt/gvfs/third-party2/snappy/7f9bdaada18f59bc27ec2b0871eb8a6144343aef/1.1.3/gcc-5-glibc-2.23/9bc6787
ZLIB_BASE=/mnt/gvfs/third-party2/zlib/2d9f0b9a4274cc21f61272a9e89bdb859bce8f1f/1.2.8/gcc-5-glibc-2.23/9bc6787
BZIP2_BASE=/mnt/gvfs/third-party2/bzip2/dc49a21c5fceec6456a7a28a94dcd16690af1337/1.0.6/gcc-5-glibc-2.23/9bc6787
LZ4_BASE=/mnt/gvfs/third-party2/lz4/0f607f8fc442ea7d6b876931b1898bb573d5e5da/1.9.1/gcc-5-glibc-2.23/9bc6787
ZSTD_BASE=/mnt/gvfs/third-party2/zstd/ca22bc441a4eb709e9e0b1f9fec9750fed7b31c5/1.4.x/gcc-5-glibc-2.23/03859b5
GFLAGS_BASE=/mnt/gvfs/third-party2/gflags/0b9929d2588991c65a57168bf88aff2db87c5d48/2.2.0/gcc-5-glibc-2.23/9bc6787
JEMALLOC_BASE=/mnt/gvfs/third-party2/jemalloc/c26f08f47ac35fc31da2633b7da92d6b863246eb/master/gcc-5-glibc-2.23/0c8f76d
NUMA_BASE=/mnt/gvfs/third-party2/numa/3f3fb57a5ccc5fd21c66416c0b83e0aa76a05376/2.0.11/gcc-5-glibc-2.23/9bc6787
LIBUNWIND_BASE=/mnt/gvfs/third-party2/libunwind/40c73d874898b386a71847f1b99115d93822d11f/1.4/gcc-5-glibc-2.23/b443de1
TBB_BASE=/mnt/gvfs/third-party2/tbb/4ce8e8dba77cdbd81b75d6f0c32fd7a1b76a11ec/2018_U5/gcc-5-glibc-2.23/9bc6787
KERNEL_HEADERS_BASE=/mnt/gvfs/third-party2/kernel-headers/fb251ecd2f5ae16f8671f7014c246e52a748fe0b/4.0.9-36_fbk5_2933_gd092e3f/gcc-5-glibc-2.23/da39a3e
BINUTILS_BASE=/mnt/gvfs/third-party2/binutils/2e3cb7d119b3cea5f1e738cc13a1ac69f49eb875/2.29.1/centos7-native/da39a3e
VALGRIND_BASE=/mnt/gvfs/third-party2/valgrind/d42d152a15636529b0861ec493927200ebebca8e/3.15.0/gcc-5-glibc-2.23/9bc6787
LUA_BASE=/mnt/gvfs/third-party2/lua/f0cd714433206d5139df61659eb7b28b1dea6683/5.2.3/gcc-5-glibc-2.23/65372bd

View File

@ -0,0 +1,20 @@
# shellcheck disable=SC2148
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
GCC_BASE=/mnt/gvfs/third-party2/gcc/cf7d14c625ce30bae1a4661c2319c5a283e4dd22/4.8.1/centos6-native/cc6c9dc
CLANG_BASE=/mnt/gvfs/third-party2/llvm-fb/8598c375b0e94e1448182eb3df034704144a838d/stable/centos6-native/3f16ddd
LIBGCC_BASE=/mnt/gvfs/third-party2/libgcc/d6e0a7da6faba45f5e5b1638f9edd7afc2f34e7d/4.8.1/gcc-4.8.1-glibc-2.17/8aac7fc
GLIBC_BASE=/mnt/gvfs/third-party2/glibc/d282e6e8f3d20f4e40a516834847bdc038e07973/2.17/gcc-4.8.1-glibc-2.17/99df8fc
SNAPPY_BASE=/mnt/gvfs/third-party2/snappy/8c38a4c1e52b4c2cc8a9cdc31b9c947ed7dbfcb4/1.1.3/gcc-4.8.1-glibc-2.17/c3f970a
ZLIB_BASE=/mnt/gvfs/third-party2/zlib/0882df3713c7a84f15abe368dc004581f20b39d7/1.2.8/gcc-4.8.1-glibc-2.17/c3f970a
BZIP2_BASE=/mnt/gvfs/third-party2/bzip2/740325875f6729f42d28deaa2147b0854f3a347e/1.0.6/gcc-4.8.1-glibc-2.17/c3f970a
LZ4_BASE=/mnt/gvfs/third-party2/lz4/0e790b441e2d9acd68d51e1d2e028f88c6a79ddf/r131/gcc-4.8.1-glibc-2.17/c3f970a
ZSTD_BASE=/mnt/gvfs/third-party2/zstd/9455f75ff7f4831dc9fda02a6a0f8c68922fad8f/1.0.0/gcc-4.8.1-glibc-2.17/c3f970a
GFLAGS_BASE=/mnt/gvfs/third-party2/gflags/f001a51b2854957676d07306ef3abf67186b5c8b/2.1.1/gcc-4.8.1-glibc-2.17/c3f970a
JEMALLOC_BASE=/mnt/gvfs/third-party2/jemalloc/fc8a13ca1fffa4d0765c716c5a0b49f0c107518f/master/gcc-4.8.1-glibc-2.17/8d31e51
NUMA_BASE=/mnt/gvfs/third-party2/numa/17c514c4d102a25ca15f4558be564eeed76f4b6a/2.0.8/gcc-4.8.1-glibc-2.17/c3f970a
LIBUNWIND_BASE=/mnt/gvfs/third-party2/libunwind/ad576de2a1ea560c4d3434304f0fc4e079bede42/trunk/gcc-4.8.1-glibc-2.17/675d945
TBB_BASE=/mnt/gvfs/third-party2/tbb/9d9a554877d0c5bef330fe818ab7178806dd316a/4.0_update2/gcc-4.8.1-glibc-2.17/c3f970a
KERNEL_HEADERS_BASE=/mnt/gvfs/third-party2/kernel-headers/7c111ff27e0c466235163f00f280a9d617c3d2ec/4.0.9-36_fbk5_2933_gd092e3f/gcc-4.8.1-glibc-2.17/da39a3e
BINUTILS_BASE=/mnt/gvfs/third-party2/binutils/b7fd454c4b10c6a81015d4524ed06cdeab558490/2.26/centos6-native/da39a3e
VALGRIND_BASE=/mnt/gvfs/third-party2/valgrind/d7f4d4d86674a57668e3a96f76f0e17dd0eb8765/3.8.1/gcc-4.8.1-glibc-2.17/c3f970a
LUA_BASE=/mnt/gvfs/third-party2/lua/61e4abf5813bbc39bc4f548757ccfcadde175a48/5.2.3/centos6-native/730f94e

View File

@ -0,0 +1,20 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
GCC_BASE=/mnt/gvfs/third-party2/gcc/7331085db891a2ef4a88a48a751d834e8d68f4cb/7.x/centos7-native/b2ef2b6
CLANG_BASE=/mnt/gvfs/third-party2/llvm-fb/963d9aeda70cc4779885b1277484fe7544a04e3e/9.0.0/platform007/9e92d53/
LIBGCC_BASE=/mnt/gvfs/third-party2/libgcc/6ace84e956873d53638c738b6f65f3f469cca74c/7.x/platform007/5620abc
GLIBC_BASE=/mnt/gvfs/third-party2/glibc/192b0f42d63dcf6210d6ceae387b49af049e6e0c/2.26/platform007/f259413
SNAPPY_BASE=/mnt/gvfs/third-party2/snappy/7f9bdaada18f59bc27ec2b0871eb8a6144343aef/1.1.3/platform007/ca4da3d
ZLIB_BASE=/mnt/gvfs/third-party2/zlib/2d9f0b9a4274cc21f61272a9e89bdb859bce8f1f/1.2.8/platform007/ca4da3d
BZIP2_BASE=/mnt/gvfs/third-party2/bzip2/dc49a21c5fceec6456a7a28a94dcd16690af1337/1.0.6/platform007/ca4da3d
LZ4_BASE=/mnt/gvfs/third-party2/lz4/0f607f8fc442ea7d6b876931b1898bb573d5e5da/1.9.1/platform007/ca4da3d
ZSTD_BASE=/mnt/gvfs/third-party2/zstd/ca22bc441a4eb709e9e0b1f9fec9750fed7b31c5/1.4.x/platform007/15a3614
GFLAGS_BASE=/mnt/gvfs/third-party2/gflags/0b9929d2588991c65a57168bf88aff2db87c5d48/2.2.0/platform007/ca4da3d
JEMALLOC_BASE=/mnt/gvfs/third-party2/jemalloc/c26f08f47ac35fc31da2633b7da92d6b863246eb/master/platform007/c26c002
NUMA_BASE=/mnt/gvfs/third-party2/numa/3f3fb57a5ccc5fd21c66416c0b83e0aa76a05376/2.0.11/platform007/ca4da3d
LIBUNWIND_BASE=/mnt/gvfs/third-party2/libunwind/40c73d874898b386a71847f1b99115d93822d11f/1.4/platform007/6f3e0a9
TBB_BASE=/mnt/gvfs/third-party2/tbb/4ce8e8dba77cdbd81b75d6f0c32fd7a1b76a11ec/2018_U5/platform007/ca4da3d
LIBURING_BASE=/mnt/gvfs/third-party2/liburing/79427253fd0d42677255aacfe6d13bfe63f752eb/20190828/platform007/ca4da3d
KERNEL_HEADERS_BASE=/mnt/gvfs/third-party2/kernel-headers/fb251ecd2f5ae16f8671f7014c246e52a748fe0b/fb/platform007/da39a3e
BINUTILS_BASE=/mnt/gvfs/third-party2/binutils/ab9f09bba370e7066cafd4eb59752db93f2e8312/2.29.1/platform007/15a3614
VALGRIND_BASE=/mnt/gvfs/third-party2/valgrind/d42d152a15636529b0861ec493927200ebebca8e/3.15.0/platform007/ca4da3d
LUA_BASE=/mnt/gvfs/third-party2/lua/f0cd714433206d5139df61659eb7b28b1dea6683/5.3.4/platform007/5007832

View File

@ -1,22 +0,0 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
GCC_BASE=/mnt/gvfs/third-party2/gcc/1795efe5f06778c15a92c8f9a2aba5dc496d9d4d/9.x/centos7-native/3bed279
CLANG_BASE=/mnt/gvfs/third-party2/llvm-fb/7318eaac22659b6ff2fe43918e4b69fd0772a8a7/9.0.0/platform009/651ee30
LIBGCC_BASE=/mnt/gvfs/third-party2/libgcc/4959b39cfbe5965a37c861c4c327fa7c5c759b87/9.x/platform009/9202ce7
GLIBC_BASE=/mnt/gvfs/third-party2/glibc/45ce3375cdc77ecb2520bbf8f0ecddd3f98efd7a/2.30/platform009/f259413
SNAPPY_BASE=/mnt/gvfs/third-party2/snappy/be4de3205e029101b18aa8103daa696c2bef3b19/1.1.3/platform009/7f3b187
ZLIB_BASE=/mnt/gvfs/third-party2/zlib/3c160ac5c67e257501e24c6c1d00ad5e01d73db6/1.2.8/platform009/7f3b187
BZIP2_BASE=/mnt/gvfs/third-party2/bzip2/73a237ac5bc0a5f5d67b39b8d253cfebaab88684/1.0.6/platform009/7f3b187
LZ4_BASE=/mnt/gvfs/third-party2/lz4/ec6573523b0ce55ef6373a4801189027cf07bb2c/1.9.1/platform009/7f3b187
ZSTD_BASE=/mnt/gvfs/third-party2/zstd/64c58a207d2495e83abc57a500a956df09b79a7c/1.4.x/platform009/ba86d1f
GFLAGS_BASE=/mnt/gvfs/third-party2/gflags/824d0a8a5abb5b121afd1b35fc3896407ea50092/2.2.0/platform009/7f3b187
JEMALLOC_BASE=/mnt/gvfs/third-party2/jemalloc/d9aef9feb850b168a68736420f217b01cce11a89/master/platform009/c305944
NUMA_BASE=/mnt/gvfs/third-party2/numa/0af65f71e23a67bf65dc91b11f95caa39325c432/2.0.11/platform009/7f3b187
LIBUNWIND_BASE=/mnt/gvfs/third-party2/libunwind/02486dac347645d31dce116f44e1de3177315be2/1.4/platform009/5191652
TBB_BASE=/mnt/gvfs/third-party2/tbb/2e0ec671e550bfca347300bf3f789d9c0fff24ad/2018_U5/platform009/7f3b187
LIBURING_BASE=/mnt/gvfs/third-party2/liburing/70dbd9cfee63a25611417d09433a86d7711b3990/20200729/platform009/7f3b187
KERNEL_HEADERS_BASE=/mnt/gvfs/third-party2/kernel-headers/32b8a2407b634df3f8f948ba373fc4acc6a18296/fb/platform009/da39a3e
BINUTILS_BASE=/mnt/gvfs/third-party2/binutils/08634589372fa5f237bfd374e8c644a8364e78c1/2.32/platform009/ba86d1f/
VALGRIND_BASE=/mnt/gvfs/third-party2/valgrind/6ae525939ad02e5e676855082fbbc7828dbafeac/3.15.0/platform009/7f3b187
LUA_BASE=/mnt/gvfs/third-party2/lua/162efd9561a3d21f6869f4814011e9cf1b3ff4dc/5.3.4/platform009/a6271c4
BENCHMARK_BASE=/mnt/gvfs/third-party2/benchmark/30bf49ad6414325e17f3425b0edcb64239427ae3/1.6.1/platform009/7f3b187
BOOST_BASE=/mnt/gvfs/third-party2/boost/201b7d74941e54b436dfa364a063aa6d2cd7de4c/1.69.0/platform009/8a7ffdf

View File

@ -1,22 +0,0 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
# The file is generated using update_dependencies.sh.
GCC_BASE=/mnt/gvfs/third-party2/gcc/e40bde78650fa91b8405a857e3f10bf336633fb0/11.x/centos7-native/886b5eb
CLANG_BASE=/mnt/gvfs/third-party2/llvm-fb/2043340983c032915adbb6f78903dc855b65aee8/12/platform010/9520e0f
LIBGCC_BASE=/mnt/gvfs/third-party2/libgcc/c00dcc6a3e4125c7e8b248e9a79c14b78ac9e0ca/11.x/platform010/5684a5a
GLIBC_BASE=/mnt/gvfs/third-party2/glibc/0b9c8e4b060eda62f3bc1c6127bbe1256697569b/2.34/platform010/f259413
SNAPPY_BASE=/mnt/gvfs/third-party2/snappy/bc9647f7912b131315827d65cb6189c21f381d05/1.1.3/platform010/76ebdda
ZLIB_BASE=/mnt/gvfs/third-party2/zlib/a6f5f3f1d063d2d00cd02fc12f0f05fc3ab3a994/1.2.11/platform010/76ebdda
BZIP2_BASE=/mnt/gvfs/third-party2/bzip2/09703139cfc376bd8a82642385a0e97726b28287/1.0.6/platform010/76ebdda
LZ4_BASE=/mnt/gvfs/third-party2/lz4/60220d6a5bf7722b9cc239a1368c596619b12060/1.9.1/platform010/76ebdda
ZSTD_BASE=/mnt/gvfs/third-party2/zstd/50eace8143eaaea9473deae1f3283e0049e05633/1.4.x/platform010/64091f4
GFLAGS_BASE=/mnt/gvfs/third-party2/gflags/5d27e5919771603da06000a027b12f799e58a4f7/2.2.0/platform010/76ebdda
JEMALLOC_BASE=/mnt/gvfs/third-party2/jemalloc/b62912d333ef33f9760efa6219dbe3fe6abb3b0e/master/platform010/f57cc4a
NUMA_BASE=/mnt/gvfs/third-party2/numa/6b412770957aa3c8a87e5e0dcd8cc2f45f393bc0/2.0.11/platform010/76ebdda
LIBUNWIND_BASE=/mnt/gvfs/third-party2/libunwind/52f69816e936e147664ad717eb71a1a0e9dc973a/1.4/platform010/5074a48
TBB_BASE=/mnt/gvfs/third-party2/tbb/c9cc192099fa84c0dcd0ffeedd44a373ad6e4925/2018_U5/platform010/76ebdda
LIBURING_BASE=/mnt/gvfs/third-party2/liburing/a98e2d137007e3ebf7f33bd6f99c2c56bdaf8488/20210212/platform010/76ebdda
BENCHMARK_BASE=/mnt/gvfs/third-party2/benchmark/780c7a0f9cf0967961e69ad08e61cddd85d61821/trunk/platform010/76ebdda
KERNEL_HEADERS_BASE=/mnt/gvfs/third-party2/kernel-headers/02d9f76aaaba580611cf75e741753c800c7fdc12/fb/platform010/da39a3e
BINUTILS_BASE=/mnt/gvfs/third-party2/binutils/938dc3f064ef3a48c0446f5b11d788d50b3eb5ee/2.37/centos7-native/da39a3e
VALGRIND_BASE=/mnt/gvfs/third-party2/valgrind/429a6b3203eb415f1599bd15183659153129188e/3.15.0/platform010/76ebdda
LUA_BASE=/mnt/gvfs/third-party2/lua/363787fa5cac2a8aa20638909210443278fa138e/5.3.4/platform010/9079c97

View File

@ -21,48 +21,38 @@ LIBGCC_LIBS=" -L $LIBGCC_BASE/lib"
GLIBC_INCLUDE="$GLIBC_BASE/include"
GLIBC_LIBS=" -L $GLIBC_BASE/lib"
if ! test $ROCKSDB_DISABLE_SNAPPY; then
# snappy
SNAPPY_INCLUDE=" -I $SNAPPY_BASE/include/"
if test -z $PIC_BUILD; then
SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy.a"
else
SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy_pic.a"
fi
CFLAGS+=" -DSNAPPY"
# snappy
SNAPPY_INCLUDE=" -I $SNAPPY_BASE/include/"
if test -z $PIC_BUILD; then
SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy.a"
else
SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy_pic.a"
fi
CFLAGS+=" -DSNAPPY"
if test -z $PIC_BUILD; then
if ! test $ROCKSDB_DISABLE_ZLIB; then
# location of zlib headers and libraries
ZLIB_INCLUDE=" -I $ZLIB_BASE/include/"
ZLIB_LIBS=" $ZLIB_BASE/lib/libz.a"
CFLAGS+=" -DZLIB"
fi
# location of zlib headers and libraries
ZLIB_INCLUDE=" -I $ZLIB_BASE/include/"
ZLIB_LIBS=" $ZLIB_BASE/lib/libz.a"
CFLAGS+=" -DZLIB"
if ! test $ROCKSDB_DISABLE_BZIP; then
# location of bzip headers and libraries
BZIP_INCLUDE=" -I $BZIP2_BASE/include/"
BZIP_LIBS=" $BZIP2_BASE/lib/libbz2.a"
CFLAGS+=" -DBZIP2"
fi
# location of bzip headers and libraries
BZIP_INCLUDE=" -I $BZIP2_BASE/include/"
BZIP_LIBS=" $BZIP2_BASE/lib/libbz2.a"
CFLAGS+=" -DBZIP2"
if ! test $ROCKSDB_DISABLE_LZ4; then
LZ4_INCLUDE=" -I $LZ4_BASE/include/"
LZ4_LIBS=" $LZ4_BASE/lib/liblz4.a"
CFLAGS+=" -DLZ4"
fi
LZ4_INCLUDE=" -I $LZ4_BASE/include/"
LZ4_LIBS=" $LZ4_BASE/lib/liblz4.a"
CFLAGS+=" -DLZ4"
fi
if ! test $ROCKSDB_DISABLE_ZSTD; then
ZSTD_INCLUDE=" -I $ZSTD_BASE/include/"
if test -z $PIC_BUILD; then
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd.a"
else
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd_pic.a"
fi
CFLAGS+=" -DZSTD -DZSTD_STATIC_LINKING_ONLY"
ZSTD_INCLUDE=" -I $ZSTD_BASE/include/"
if test -z $PIC_BUILD; then
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd.a"
else
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd_pic.a"
fi
CFLAGS+=" -DZSTD -DZSTD_STATIC_LINKING_ONLY"
# location of gflags headers and libraries
GFLAGS_INCLUDE=" -I $GFLAGS_BASE/include/"
@ -119,7 +109,6 @@ if [ -z "$USE_CLANG" ]; then
# gcc
CC="$GCC_BASE/bin/gcc"
CXX="$GCC_BASE/bin/g++"
AR="$GCC_BASE/bin/gcc-ar"
CFLAGS+=" -B$BINUTILS/gold"
CFLAGS+=" -isystem $GLIBC_INCLUDE"
@ -130,7 +119,6 @@ else
CLANG_INCLUDE="$CLANG_LIB/clang/stable/include"
CC="$CLANG_BIN/clang"
CXX="$CLANG_BIN/clang++"
AR="$CLANG_BIN/llvm-ar"
KERNEL_HEADERS_INCLUDE="$KERNEL_HEADERS_BASE/include"
@ -172,4 +160,6 @@ else
LUA_LIB=" $LUA_PATH/lib/liblua_pic.a"
fi
USE_FOLLY_DISTRIBUTED_MUTEX=1
export CC CXX AR CFLAGS CXXFLAGS EXEC_LDFLAGS EXEC_LDFLAGS_SHARED VALGRIND_VER JEMALLOC_LIB JEMALLOC_INCLUDE CLANG_ANALYZER CLANG_SCAN_BUILD LUA_PATH LUA_LIB

View File

@ -0,0 +1,118 @@
#!/bin/sh
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
#
# Set environment variables so that we can compile rocksdb using
# fbcode settings. It uses the latest g++ compiler and also
# uses jemalloc
BASEDIR=`dirname $BASH_SOURCE`
source "$BASEDIR/dependencies_4.8.1.sh"
# location of libgcc
LIBGCC_INCLUDE="$LIBGCC_BASE/include"
LIBGCC_LIBS=" -L $LIBGCC_BASE/lib"
# location of glibc
GLIBC_INCLUDE="$GLIBC_BASE/include"
GLIBC_LIBS=" -L $GLIBC_BASE/lib"
# location of snappy headers and libraries
SNAPPY_INCLUDE=" -I $SNAPPY_BASE/include"
SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy.a"
# location of zlib headers and libraries
ZLIB_INCLUDE=" -I $ZLIB_BASE/include"
ZLIB_LIBS=" $ZLIB_BASE/lib/libz.a"
# location of bzip headers and libraries
BZIP2_INCLUDE=" -I $BZIP2_BASE/include/"
BZIP2_LIBS=" $BZIP2_BASE/lib/libbz2.a"
LZ4_INCLUDE=" -I $LZ4_BASE/include"
LZ4_LIBS=" $LZ4_BASE/lib/liblz4.a"
ZSTD_INCLUDE=" -I $ZSTD_BASE/include"
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd.a"
# location of gflags headers and libraries
GFLAGS_INCLUDE=" -I $GFLAGS_BASE/include/"
GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags.a"
# location of jemalloc
JEMALLOC_INCLUDE=" -I $JEMALLOC_BASE/include"
JEMALLOC_LIB="$JEMALLOC_BASE/lib/libjemalloc.a"
# location of numa
NUMA_INCLUDE=" -I $NUMA_BASE/include/"
NUMA_LIB=" $NUMA_BASE/lib/libnuma.a"
# location of libunwind
LIBUNWIND="$LIBUNWIND_BASE/lib/libunwind.a"
# location of tbb
TBB_INCLUDE=" -isystem $TBB_BASE/include/"
TBB_LIBS="$TBB_BASE/lib/libtbb.a"
test "$USE_SSE" || USE_SSE=1
export USE_SSE
test "$PORTABLE" || PORTABLE=1
export PORTABLE
BINUTILS="$BINUTILS_BASE/bin"
AR="$BINUTILS/ar"
DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP2_INCLUDE $LZ4_INCLUDE $ZSTD_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE $TBB_INCLUDE"
STDLIBS="-L $GCC_BASE/lib64"
if [ -z "$USE_CLANG" ]; then
# gcc
CC="$GCC_BASE/bin/gcc"
CXX="$GCC_BASE/bin/g++"
CFLAGS="-B$BINUTILS/gold -m64 -mtune=generic"
CFLAGS+=" -isystem $GLIBC_INCLUDE"
CFLAGS+=" -isystem $LIBGCC_INCLUDE"
JEMALLOC=1
else
# clang
CLANG_BIN="$CLANG_BASE/bin"
CLANG_LIB="$CLANG_BASE/lib"
CLANG_INCLUDE="$CLANG_LIB/clang/*/include"
CC="$CLANG_BIN/clang"
CXX="$CLANG_BIN/clang++"
KERNEL_HEADERS_INCLUDE="$KERNEL_HEADERS_BASE/include/"
CFLAGS="-B$BINUTILS/gold -nostdinc -nostdlib"
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/4.8.1 "
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/4.8.1/x86_64-facebook-linux "
CFLAGS+=" -isystem $GLIBC_INCLUDE"
CFLAGS+=" -isystem $LIBGCC_INCLUDE"
CFLAGS+=" -isystem $CLANG_INCLUDE"
CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE/linux "
CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE "
CXXFLAGS="-nostdinc++"
fi
CFLAGS+=" $DEPS_INCLUDE"
CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX -DROCKSDB_FALLOCATE_PRESENT -DROCKSDB_MALLOC_USABLE_SIZE -DROCKSDB_RANGESYNC_PRESENT -DROCKSDB_SCHED_GETCPU_PRESENT -DROCKSDB_SUPPORT_THREAD_LOCAL -DHAVE_SSE42"
CFLAGS+=" -DSNAPPY -DGFLAGS=google -DZLIB -DBZIP2 -DLZ4 -DZSTD -DNUMA -DTBB"
CXXFLAGS+=" $CFLAGS"
EXEC_LDFLAGS=" $SNAPPY_LIBS $ZLIB_LIBS $BZIP2_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $NUMA_LIB $TBB_LIBS"
EXEC_LDFLAGS+=" -Wl,--dynamic-linker,/usr/local/fbcode/gcc-4.8.1-glibc-2.17/lib/ld.so"
EXEC_LDFLAGS+=" $LIBUNWIND"
EXEC_LDFLAGS+=" -Wl,-rpath=/usr/local/fbcode/gcc-4.8.1-glibc-2.17/lib"
# required by libtbb
EXEC_LDFLAGS+=" -ldl"
PLATFORM_LDFLAGS="$LIBGCC_LIBS $GLIBC_LIBS $STDLIBS -lgcc -lstdc++"
EXEC_LDFLAGS_SHARED="$SNAPPY_LIBS $ZLIB_LIBS $BZIP2_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS"
VALGRIND_VER="$VALGRIND_BASE/bin/"
LUA_PATH="$LUA_BASE"
export CC CXX AR CFLAGS CXXFLAGS EXEC_LDFLAGS EXEC_LDFLAGS_SHARED VALGRIND_VER JEMALLOC_LIB JEMALLOC_INCLUDE LUA_PATH

View File

@ -9,87 +9,90 @@
BASEDIR=`dirname $BASH_SOURCE`
source "$BASEDIR/dependencies_platform009.sh"
source "$BASEDIR/dependencies_platform007.sh"
CFLAGS=""
# libgcc
LIBGCC_INCLUDE="$LIBGCC_BASE/include/c++/9.3.0"
LIBGCC_INCLUDE="$LIBGCC_BASE/include/c++/7.3.0"
LIBGCC_LIBS=" -L $LIBGCC_BASE/lib"
# glibc
GLIBC_INCLUDE="$GLIBC_BASE/include"
GLIBC_LIBS=" -L $GLIBC_BASE/lib"
# snappy
SNAPPY_INCLUDE=" -I $SNAPPY_BASE/include/"
if test -z $PIC_BUILD; then
MAYBE_PIC=
SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy.a"
else
MAYBE_PIC=_pic
SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy_pic.a"
fi
CFLAGS+=" -DSNAPPY"
if ! test $ROCKSDB_DISABLE_SNAPPY; then
# snappy
SNAPPY_INCLUDE=" -I $SNAPPY_BASE/include/"
SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy${MAYBE_PIC}.a"
CFLAGS+=" -DSNAPPY"
fi
if ! test $ROCKSDB_DISABLE_ZLIB; then
if test -z $PIC_BUILD; then
# location of zlib headers and libraries
ZLIB_INCLUDE=" -I $ZLIB_BASE/include/"
ZLIB_LIBS=" $ZLIB_BASE/lib/libz${MAYBE_PIC}.a"
ZLIB_LIBS=" $ZLIB_BASE/lib/libz.a"
CFLAGS+=" -DZLIB"
fi
if ! test $ROCKSDB_DISABLE_BZIP; then
# location of bzip headers and libraries
BZIP_INCLUDE=" -I $BZIP2_BASE/include/"
BZIP_LIBS=" $BZIP2_BASE/lib/libbz2${MAYBE_PIC}.a"
BZIP_LIBS=" $BZIP2_BASE/lib/libbz2.a"
CFLAGS+=" -DBZIP2"
fi
if ! test $ROCKSDB_DISABLE_LZ4; then
LZ4_INCLUDE=" -I $LZ4_BASE/include/"
LZ4_LIBS=" $LZ4_BASE/lib/liblz4${MAYBE_PIC}.a"
LZ4_LIBS=" $LZ4_BASE/lib/liblz4.a"
CFLAGS+=" -DLZ4"
fi
if ! test $ROCKSDB_DISABLE_ZSTD; then
ZSTD_INCLUDE=" -I $ZSTD_BASE/include/"
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd${MAYBE_PIC}.a"
CFLAGS+=" -DZSTD"
ZSTD_INCLUDE=" -I $ZSTD_BASE/include/"
if test -z $PIC_BUILD; then
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd.a"
else
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd_pic.a"
fi
CFLAGS+=" -DZSTD"
# location of gflags headers and libraries
GFLAGS_INCLUDE=" -I $GFLAGS_BASE/include/"
GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags${MAYBE_PIC}.a"
if test -z $PIC_BUILD; then
GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags.a"
else
GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags_pic.a"
fi
CFLAGS+=" -DGFLAGS=gflags"
BENCHMARK_INCLUDE=" -I $BENCHMARK_BASE/include/"
BENCHMARK_LIBS=" $BENCHMARK_BASE/lib/libbenchmark${MAYBE_PIC}.a"
BOOST_INCLUDE=" -I $BOOST_BASE/include/"
# location of jemalloc
JEMALLOC_INCLUDE=" -I $JEMALLOC_BASE/include/"
JEMALLOC_LIB=" $JEMALLOC_BASE/lib/libjemalloc${MAYBE_PIC}.a"
JEMALLOC_LIB=" $JEMALLOC_BASE/lib/libjemalloc.a"
# location of numa
NUMA_INCLUDE=" -I $NUMA_BASE/include/"
NUMA_LIB=" $NUMA_BASE/lib/libnuma${MAYBE_PIC}.a"
CFLAGS+=" -DNUMA"
if test -z $PIC_BUILD; then
# location of numa
NUMA_INCLUDE=" -I $NUMA_BASE/include/"
NUMA_LIB=" $NUMA_BASE/lib/libnuma.a"
CFLAGS+=" -DNUMA"
# location of libunwind
LIBUNWIND="$LIBUNWIND_BASE/lib/libunwind${MAYBE_PIC}.a"
# location of libunwind
LIBUNWIND="$LIBUNWIND_BASE/lib/libunwind.a"
fi
# location of TBB
TBB_INCLUDE=" -isystem $TBB_BASE/include/"
TBB_LIBS="$TBB_BASE/lib/libtbb${MAYBE_PIC}.a"
if test -z $PIC_BUILD; then
TBB_LIBS="$TBB_BASE/lib/libtbb.a"
else
TBB_LIBS="$TBB_BASE/lib/libtbb_pic.a"
fi
CFLAGS+=" -DTBB"
# location of LIBURING
LIBURING_INCLUDE=" -isystem $LIBURING_BASE/include/"
LIBURING_LIBS="$LIBURING_BASE/lib/liburing${MAYBE_PIC}.a"
if test -z $PIC_BUILD; then
LIBURING_LIBS="$LIBURING_BASE/lib/liburing.a"
else
LIBURING_LIBS="$LIBURING_BASE/lib/liburing_pic.a"
fi
CFLAGS+=" -DLIBURING"
test "$USE_SSE" || USE_SSE=1
@ -99,9 +102,8 @@ export PORTABLE
BINUTILS="$BINUTILS_BASE/bin"
AR="$BINUTILS/ar"
AS="$BINUTILS/as"
DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP_INCLUDE $LZ4_INCLUDE $ZSTD_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE $TBB_INCLUDE $LIBURING_INCLUDE $BENCHMARK_INCLUDE $BOOST_INCLUDE"
DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP_INCLUDE $LZ4_INCLUDE $ZSTD_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE $TBB_INCLUDE $LIBURING_INCLUDE"
STDLIBS="-L $GCC_BASE/lib64"
@ -110,15 +112,14 @@ CLANG_LIB="$CLANG_BASE/lib"
CLANG_SRC="$CLANG_BASE/../../src"
CLANG_ANALYZER="$CLANG_BIN/clang++"
CLANG_SCAN_BUILD="$CLANG_SRC/llvm/clang/tools/scan-build/bin/scan-build"
CLANG_SCAN_BUILD="$CLANG_SRC/llvm/tools/clang/tools/scan-build/bin/scan-build"
if [ -z "$USE_CLANG" ]; then
# gcc
CC="$GCC_BASE/bin/gcc"
CXX="$GCC_BASE/bin/g++"
AR="$GCC_BASE/bin/gcc-ar"
CFLAGS+=" -B$BINUTILS"
CFLAGS+=" -B$BINUTILS/gold"
CFLAGS+=" -isystem $LIBGCC_INCLUDE"
CFLAGS+=" -isystem $GLIBC_INCLUDE"
JEMALLOC=1
@ -127,13 +128,12 @@ else
CLANG_INCLUDE="$CLANG_LIB/clang/stable/include"
CC="$CLANG_BIN/clang"
CXX="$CLANG_BIN/clang++"
AR="$CLANG_BIN/llvm-ar"
KERNEL_HEADERS_INCLUDE="$KERNEL_HEADERS_BASE/include"
CFLAGS+=" -B$BINUTILS -nostdinc -nostdlib"
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/9.x "
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/9.x/x86_64-facebook-linux "
CFLAGS+=" -B$BINUTILS/gold -nostdinc -nostdlib"
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/7.x "
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/7.x/x86_64-facebook-linux "
CFLAGS+=" -isystem $GLIBC_INCLUDE"
CFLAGS+=" -isystem $LIBGCC_INCLUDE"
CFLAGS+=" -isystem $CLANG_INCLUDE"
@ -147,18 +147,17 @@ CFLAGS+=" $DEPS_INCLUDE"
CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX -DROCKSDB_FALLOCATE_PRESENT -DROCKSDB_MALLOC_USABLE_SIZE -DROCKSDB_RANGESYNC_PRESENT -DROCKSDB_SCHED_GETCPU_PRESENT -DROCKSDB_SUPPORT_THREAD_LOCAL -DHAVE_SSE42 -DROCKSDB_IOURING_PRESENT"
CXXFLAGS+=" $CFLAGS"
EXEC_LDFLAGS=" $SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $NUMA_LIB $TBB_LIBS $LIBURING_LIBS $BENCHMARK_LIBS"
EXEC_LDFLAGS+=" -Wl,--dynamic-linker,/usr/local/fbcode/platform009/lib/ld.so"
EXEC_LDFLAGS=" $SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $NUMA_LIB $TBB_LIBS $LIBURING_LIBS"
EXEC_LDFLAGS+=" -B$BINUTILS/gold"
EXEC_LDFLAGS+=" -Wl,--dynamic-linker,/usr/local/fbcode/platform007/lib/ld.so"
EXEC_LDFLAGS+=" $LIBUNWIND"
EXEC_LDFLAGS+=" -Wl,-rpath=/usr/local/fbcode/platform009/lib"
EXEC_LDFLAGS+=" -Wl,-rpath=$GCC_BASE/lib64"
EXEC_LDFLAGS+=" -Wl,-rpath=/usr/local/fbcode/platform007/lib"
# required by libtbb
EXEC_LDFLAGS+=" -ldl"
PLATFORM_LDFLAGS="$LIBGCC_LIBS $GLIBC_LIBS $STDLIBS -lgcc -lstdc++"
PLATFORM_LDFLAGS+=" -B$BINUTILS"
EXEC_LDFLAGS_SHARED="$SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $TBB_LIBS $LIBURING_LIBS $BENCHMARK_LIBS"
EXEC_LDFLAGS_SHARED="$SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $TBB_LIBS $LIBURING_LIBS"
VALGRIND_VER="$VALGRIND_BASE/bin/"
@ -166,4 +165,6 @@ VALGRIND_VER="$VALGRIND_BASE/bin/"
LUA_PATH=
LUA_LIB=
export CC CXX AR AS CFLAGS CXXFLAGS EXEC_LDFLAGS EXEC_LDFLAGS_SHARED VALGRIND_VER JEMALLOC_LIB JEMALLOC_INCLUDE CLANG_ANALYZER CLANG_SCAN_BUILD LUA_PATH LUA_LIB
USE_FOLLY_DISTRIBUTED_MUTEX=1
export CC CXX AR CFLAGS CXXFLAGS EXEC_LDFLAGS EXEC_LDFLAGS_SHARED VALGRIND_VER JEMALLOC_LIB JEMALLOC_INCLUDE CLANG_ANALYZER CLANG_SCAN_BUILD LUA_PATH LUA_LIB

View File

@ -1,175 +0,0 @@
#!/bin/sh
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
#
# Set environment variables so that we can compile rocksdb using
# fbcode settings. It uses the latest g++ and clang compilers and also
# uses jemalloc
# Environment variables that change the behavior of this script:
# PIC_BUILD -- if true, it will only take pic versions of libraries from fbcode. libraries that don't have pic variant will not be included
BASEDIR=`dirname $BASH_SOURCE`
source "$BASEDIR/dependencies_platform010.sh"
# Disallow using libraries from default locations as they might not be compatible with platform010 libraries.
CFLAGS=" --sysroot=/DOES/NOT/EXIST"
# libgcc
LIBGCC_INCLUDE="$LIBGCC_BASE/include/c++/trunk"
LIBGCC_LIBS=" -L $LIBGCC_BASE/lib -B$LIBGCC_BASE/lib/gcc/x86_64-facebook-linux/trunk/"
# glibc
GLIBC_INCLUDE="$GLIBC_BASE/include"
GLIBC_LIBS=" -L $GLIBC_BASE/lib"
GLIBC_LIBS+=" -B$GLIBC_BASE/lib"
if test -z $PIC_BUILD; then
MAYBE_PIC=
else
MAYBE_PIC=_pic
fi
if ! test $ROCKSDB_DISABLE_SNAPPY; then
# snappy
SNAPPY_INCLUDE=" -I $SNAPPY_BASE/include/"
SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy${MAYBE_PIC}.a"
CFLAGS+=" -DSNAPPY"
fi
if ! test $ROCKSDB_DISABLE_ZLIB; then
# location of zlib headers and libraries
ZLIB_INCLUDE=" -I $ZLIB_BASE/include/"
ZLIB_LIBS=" $ZLIB_BASE/lib/libz${MAYBE_PIC}.a"
CFLAGS+=" -DZLIB"
fi
if ! test $ROCKSDB_DISABLE_BZIP; then
# location of bzip headers and libraries
BZIP_INCLUDE=" -I $BZIP2_BASE/include/"
BZIP_LIBS=" $BZIP2_BASE/lib/libbz2${MAYBE_PIC}.a"
CFLAGS+=" -DBZIP2"
fi
if ! test $ROCKSDB_DISABLE_LZ4; then
LZ4_INCLUDE=" -I $LZ4_BASE/include/"
LZ4_LIBS=" $LZ4_BASE/lib/liblz4${MAYBE_PIC}.a"
CFLAGS+=" -DLZ4"
fi
if ! test $ROCKSDB_DISABLE_ZSTD; then
ZSTD_INCLUDE=" -I $ZSTD_BASE/include/"
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd${MAYBE_PIC}.a"
CFLAGS+=" -DZSTD"
fi
# location of gflags headers and libraries
GFLAGS_INCLUDE=" -I $GFLAGS_BASE/include/"
GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags${MAYBE_PIC}.a"
CFLAGS+=" -DGFLAGS=gflags"
BENCHMARK_INCLUDE=" -I $BENCHMARK_BASE/include/"
BENCHMARK_LIBS=" $BENCHMARK_BASE/lib/libbenchmark${MAYBE_PIC}.a"
# location of jemalloc
JEMALLOC_INCLUDE=" -I $JEMALLOC_BASE/include/"
JEMALLOC_LIB=" $JEMALLOC_BASE/lib/libjemalloc${MAYBE_PIC}.a"
# location of numa
NUMA_INCLUDE=" -I $NUMA_BASE/include/"
NUMA_LIB=" $NUMA_BASE/lib/libnuma${MAYBE_PIC}.a"
CFLAGS+=" -DNUMA"
# location of libunwind
LIBUNWIND="$LIBUNWIND_BASE/lib/libunwind${MAYBE_PIC}.a"
# location of TBB
TBB_INCLUDE=" -isystem $TBB_BASE/include/"
TBB_LIBS="$TBB_BASE/lib/libtbb${MAYBE_PIC}.a"
CFLAGS+=" -DTBB"
# location of LIBURING
LIBURING_INCLUDE=" -isystem $LIBURING_BASE/include/"
LIBURING_LIBS="$LIBURING_BASE/lib/liburing${MAYBE_PIC}.a"
CFLAGS+=" -DLIBURING"
test "$USE_SSE" || USE_SSE=1
export USE_SSE
test "$PORTABLE" || PORTABLE=1
export PORTABLE
BINUTILS="$BINUTILS_BASE/bin"
AR="$BINUTILS/ar"
AS="$BINUTILS/as"
DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP_INCLUDE $LZ4_INCLUDE $ZSTD_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE $TBB_INCLUDE $LIBURING_INCLUDE $BENCHMARK_INCLUDE"
STDLIBS="-L $GCC_BASE/lib64"
CLANG_BIN="$CLANG_BASE/bin"
CLANG_LIB="$CLANG_BASE/lib"
CLANG_SRC="$CLANG_BASE/../../src"
CLANG_ANALYZER="$CLANG_BIN/clang++"
CLANG_SCAN_BUILD="$CLANG_SRC/llvm/clang/tools/scan-build/bin/scan-build"
if [ -z "$USE_CLANG" ]; then
# gcc
CC="$GCC_BASE/bin/gcc"
CXX="$GCC_BASE/bin/g++"
AR="$GCC_BASE/bin/gcc-ar"
CFLAGS+=" -B$BINUTILS -nostdinc -nostdlib"
CFLAGS+=" -I$GCC_BASE/include"
CFLAGS+=" -isystem $GCC_BASE/lib/gcc/x86_64-redhat-linux-gnu/11.2.1/include"
CFLAGS+=" -isystem $GCC_BASE/lib/gcc/x86_64-redhat-linux-gnu/11.2.1/install-tools/include"
CFLAGS+=" -isystem $GCC_BASE/lib/gcc/x86_64-redhat-linux-gnu/11.2.1/include-fixed/"
CFLAGS+=" -isystem $LIBGCC_INCLUDE"
CFLAGS+=" -isystem $GLIBC_INCLUDE"
CFLAGS+=" -I$GLIBC_INCLUDE"
CFLAGS+=" -I$LIBGCC_BASE/include"
CFLAGS+=" -I$LIBGCC_BASE/include/c++/11.x/"
CFLAGS+=" -I$LIBGCC_BASE/include/c++/11.x/x86_64-facebook-linux/"
CFLAGS+=" -I$LIBGCC_BASE/include/c++/11.x/backward"
CFLAGS+=" -isystem $GLIBC_INCLUDE -I$GLIBC_INCLUDE"
JEMALLOC=1
else
# clang
CLANG_INCLUDE="$CLANG_LIB/clang/stable/include"
CC="$CLANG_BIN/clang"
CXX="$CLANG_BIN/clang++"
AR="$CLANG_BIN/llvm-ar"
CFLAGS+=" -B$BINUTILS -nostdinc -nostdlib"
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/trunk "
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/trunk/x86_64-facebook-linux "
CFLAGS+=" -isystem $GLIBC_INCLUDE"
CFLAGS+=" -isystem $LIBGCC_INCLUDE"
CFLAGS+=" -isystem $CLANG_INCLUDE"
CFLAGS+=" -Wno-expansion-to-defined "
CXXFLAGS="-nostdinc++"
fi
KERNEL_HEADERS_INCLUDE="$KERNEL_HEADERS_BASE/include"
CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE/linux "
CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE "
CFLAGS+=" $DEPS_INCLUDE"
CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX -DROCKSDB_FALLOCATE_PRESENT -DROCKSDB_MALLOC_USABLE_SIZE -DROCKSDB_RANGESYNC_PRESENT -DROCKSDB_SCHED_GETCPU_PRESENT -DROCKSDB_SUPPORT_THREAD_LOCAL -DHAVE_SSE42 -DROCKSDB_IOURING_PRESENT"
CXXFLAGS+=" $CFLAGS"
EXEC_LDFLAGS=" $SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $NUMA_LIB $TBB_LIBS $LIBURING_LIBS $BENCHMARK_LIBS"
EXEC_LDFLAGS+=" -Wl,--dynamic-linker,/usr/local/fbcode/platform010/lib/ld.so"
EXEC_LDFLAGS+=" $LIBUNWIND"
EXEC_LDFLAGS+=" -Wl,-rpath=/usr/local/fbcode/platform010/lib"
EXEC_LDFLAGS+=" -Wl,-rpath=$GCC_BASE/lib64"
# required by libtbb
EXEC_LDFLAGS+=" -ldl"
PLATFORM_LDFLAGS="$LIBGCC_LIBS $GLIBC_LIBS $STDLIBS -lgcc -lstdc++"
PLATFORM_LDFLAGS+=" -B$BINUTILS"
EXEC_LDFLAGS_SHARED="$SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $TBB_LIBS $LIBURING_LIBS $BENCHMARK_LIBS"
VALGRIND_VER="$VALGRIND_BASE/bin/"
export CC CXX AR AS CFLAGS CXXFLAGS EXEC_LDFLAGS EXEC_LDFLAGS_SHARED VALGRIND_VER JEMALLOC_LIB JEMALLOC_INCLUDE CLANG_ANALYZER CLANG_SCAN_BUILD LUA_PATH LUA_LIB

View File

@ -38,9 +38,7 @@ if [ "$CLANG_FORMAT_DIFF" ]; then
fi
else
# First try directly executing the possibilities
if clang-format-diff --help &> /dev/null < /dev/null; then
CLANG_FORMAT_DIFF=clang-format-diff
elif clang-format-diff.py --help &> /dev/null < /dev/null; then
if clang-format-diff.py --help &> /dev/null < /dev/null; then
CLANG_FORMAT_DIFF=clang-format-diff.py
elif $REPO_ROOT/clang-format-diff.py --help &> /dev/null < /dev/null; then
CLANG_FORMAT_DIFF=$REPO_ROOT/clang-format-diff.py
@ -54,16 +52,15 @@ else
else
echo "You didn't have clang-format-diff.py and/or clang-format available in your computer!"
echo "You can download clang-format-diff.py by running: "
echo " curl --location https://raw.githubusercontent.com/llvm/llvm-project/main/clang/tools/clang-format/clang-format-diff.py -o ${REPO_ROOT}/clang-format-diff.py"
echo "You should make sure the downloaded script is not compromised."
echo " curl --location http://goo.gl/iUW1u2 -o ${CLANG_FORMAT_DIFF}"
echo "You can download clang-format by running:"
echo " brew install clang-format"
echo " Or"
echo " apt install clang-format"
echo " This might work too:"
echo " yum install git-clang-format"
echo "Then make sure clang-format is available and executable from \$PATH:"
echo " clang-format --version"
echo "Then, move both files (i.e. ${CLANG_FORMAT_DIFF} and clang-format) to some directory within PATH=${PATH}"
echo "and make sure ${CLANG_FORMAT_DIFF} is executable."
exit 128
fi
# Check argparse pre-req on interpreter, or it will fail
@ -78,16 +75,17 @@ else
exit 129
fi
# Unfortunately, some machines have a Python2 clang-format-diff.py
# installed but only a Python3 interpreter installed. Unfortunately,
# automatic 2to3 migration is insufficient, so suggest downloading latest.
# installed but only a Python3 interpreter installed. Rather than trying
# different Python versions that might be installed, we can try migrating
# the code to Python3 if it looks like Python2
if grep -q "print '" "$CFD_PATH" && \
${PYTHON:-python3} --version | grep -q 'ython 3'; then
echo "You have clang-format-diff.py for Python 2 but are using a Python 3"
echo "interpreter (${PYTHON:-python3})."
echo "You can download clang-format-diff.py for Python 3 by running: "
echo " curl --location https://raw.githubusercontent.com/llvm/llvm-project/main/clang/tools/clang-format/clang-format-diff.py -o ${REPO_ROOT}/clang-format-diff.py"
echo "You should make sure the downloaded script is not compromised."
exit 130
if [ ! -f "$REPO_ROOT/.py3/clang-format-diff.py" ]; then
echo "Migrating $CFD_PATH to Python3 in a hidden file"
mkdir -p "$REPO_ROOT/.py3"
${PYTHON:-python3} -m lib2to3 -w -n -o "$REPO_ROOT/.py3" "$CFD_PATH" > /dev/null || exit 128
fi
CFD_PATH="$REPO_ROOT/.py3/clang-format-diff.py"
fi
CLANG_FORMAT_DIFF="${PYTHON:-python3} $CFD_PATH"
# This had better work after all those checks
@ -122,27 +120,25 @@ uncommitted_code=`git diff HEAD`
# If there's no uncommitted changes, we assume user are doing post-commit
# format check, in which case we'll try to check the modified lines vs. the
# facebook/rocksdb.git main branch. Otherwise, we'll check format of the
# facebook/rocksdb.git master branch. Otherwise, we'll check format of the
# uncommitted code only.
if [ -z "$uncommitted_code" ]
then
# Attempt to get name of facebook/rocksdb.git remote.
[ "$FORMAT_REMOTE" ] || FORMAT_REMOTE="$(LC_ALL=POSIX LANG=POSIX git remote -v | grep 'facebook/rocksdb.git' | head -n 1 | cut -f 1)"
[ "$FORMAT_REMOTE" ] || FORMAT_REMOTE="$(git remote -v | grep 'facebook/rocksdb.git' | head -n 1 | cut -f 1)"
# Fall back on 'origin' if that fails
[ "$FORMAT_REMOTE" ] || FORMAT_REMOTE=origin
# Use main branch from that remote
[ "$FORMAT_UPSTREAM" ] || FORMAT_UPSTREAM="$FORMAT_REMOTE/$(LC_ALL=POSIX LANG=POSIX git remote show $FORMAT_REMOTE | sed -n '/HEAD branch/s/.*: //p')"
# Use master branch from that remote
[ "$FORMAT_UPSTREAM" ] || FORMAT_UPSTREAM="$FORMAT_REMOTE/master"
# Get the common ancestor with that remote branch. Everything after that
# common ancestor would be considered the contents of a pull request, so
# should be relevant for formatting fixes.
FORMAT_UPSTREAM_MERGE_BASE="$(git merge-base "$FORMAT_UPSTREAM" HEAD)"
# Get the differences
diffs=$(git diff -U0 "$FORMAT_UPSTREAM_MERGE_BASE" | $CLANG_FORMAT_DIFF -p 1)
echo "Checking format of changes not yet in $FORMAT_UPSTREAM..."
else
# Check the format of uncommitted lines,
diffs=$(git diff -U0 HEAD | $CLANG_FORMAT_DIFF -p 1)
echo "Checking format of uncommitted changes..."
fi
if [ -z "$diffs" ]

View File

@ -1561,7 +1561,6 @@ sub save_stdin_stdout_stderr {
::die_bug("Can't dup STDERR: $!");
open $Global::original_stdin, "<&", "STDIN" or
::die_bug("Can't dup STDIN: $!");
$Global::is_terminal = (-t $Global::original_stderr) && !$ENV{'CIRCLECI'} && !$ENV{'TRAVIS'};
}
sub enough_file_handles {
@ -1841,17 +1840,12 @@ sub start_another_job {
}
}
$opt::min_progress_interval = 0;
sub init_progress {
# Uses:
# $opt::bar
# Returns:
# list of computers for progress output
$|=1;
if (not $Global::is_terminal) {
$opt::min_progress_interval = 30;
}
if($opt::bar) {
return("","");
}
@ -1876,9 +1870,6 @@ sub drain_job_queue {
}
my $last_header="";
my $sleep = 0.2;
my $last_left = 1000000000;
my $last_progress_time = 0;
my $ps_reported = 0;
do {
while($Global::total_running > 0) {
debug($Global::total_running, "==", scalar
@ -1889,38 +1880,14 @@ sub drain_job_queue {
close $job->fh(0,"w");
}
}
# When not connected to terminal, assume CI (e.g. CircleCI). In
# that case we want occasional progress output to prevent abort
# due to timeout with no output, but we also need to stop sending
# progress output if there has been no actual progress, so that
# the job can time out appropriately (CirecleCI: 10m) in case of
# a hung test. But without special output, it is extremely
# annoying to diagnose which test is hung, so we add that using
# `ps` below.
if($opt::progress and
($Global::is_terminal or (time() - $last_progress_time) >= 30)) {
if($opt::progress) {
my %progress = progress();
if($last_header ne $progress{'header'}) {
print $Global::original_stderr "\n", $progress{'header'}, "\n";
$last_header = $progress{'header'};
}
if ($Global::is_terminal) {
print $Global::original_stderr "\r",$progress{'status'};
}
if ($last_left > $Global::left) {
if (not $Global::is_terminal) {
print $Global::original_stderr $progress{'status'},"\n";
}
$last_progress_time = time();
$ps_reported = 0;
} elsif (not $ps_reported and (time() - $last_progress_time) >= 60) {
# No progress in at least 60 seconds: run ps
print $Global::original_stderr "\n";
system("ps", "-wf");
$ps_reported = 1;
}
$last_left = $Global::left;
flush $Global::original_stderr;
print $Global::original_stderr "\r",$progress{'status'};
flush $Global::original_stderr;
}
if($Global::total_running < $Global::max_jobs_running
and not $Global::JobQueue->empty()) {
@ -1954,7 +1921,7 @@ sub drain_job_queue {
not $Global::start_no_new_jobs and not $Global::JobQueue->empty());
if($opt::progress) {
my %progress = progress();
print $Global::original_stderr $opt::progress_sep, $progress{'status'}, "\n";
print $Global::original_stderr "\r", $progress{'status'}, "\n";
flush $Global::original_stderr;
}
}
@ -1987,11 +1954,10 @@ sub progress {
my $eta = "";
my ($status,$header)=("","");
if($opt::eta) {
my($total, $completed, $left, $pctcomplete, $avgtime, $this_eta) =
compute_eta();
$eta = sprintf("ETA: %ds Left: %d AVG: %.2fs ",
$this_eta, $left, $avgtime);
$Global::left = $left;
my($total, $completed, $left, $pctcomplete, $avgtime, $this_eta) =
compute_eta();
$eta = sprintf("ETA: %ds Left: %d AVG: %.2fs ",
$this_eta, $left, $avgtime);
}
my $termcols = terminal_columns();
my @workers = sort keys %Global::host;

View File

@ -103,26 +103,31 @@ function main() {
gem_install fpm
make static_lib
LIBDIR=/usr/lib
if [[ $FPM_OUTPUT = "rpm" ]]; then
LIBDIR=$(rpm --eval '%_libdir')
fi
make install INSTALL_PATH=package
rm -rf package
make install DESTDIR=package PREFIX=/usr LIBDIR=$LIBDIR
cd package
LIB_DIR=lib
if [[ -z "$ARCH" ]]; then
ARCH=$(getconf LONG_BIT)
fi
if [[ ("$FPM_OUTPUT" = "rpm") && ($ARCH -eq 64) ]]; then
mv lib lib64
LIB_DIR=lib64
fi
fpm \
-s dir \
-t $FPM_OUTPUT \
-C package \
-n rocksdb \
-v $1 \
--prefix /usr \
--url http://rocksdb.org/ \
-m rocksdb@fb.com \
--license BSD \
--vendor Facebook \
--description "RocksDB is an embeddable persistent key-value store for fast storage." \
usr
include $LIB_DIR
}
# shellcheck disable=SC2068

209
build_tools/precommit_checker.py Executable file
View File

@ -0,0 +1,209 @@
#!/usr/bin/env python2.7
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import argparse
import commands
import subprocess
import sys
import re
import os
import time
#
# Simple logger
#
class Log:
def __init__(self, filename):
self.filename = filename
self.f = open(self.filename, 'w+', 0)
def caption(self, str):
line = "\n##### %s #####\n" % str
if self.f:
self.f.write("%s \n" % line)
else:
print(line)
def error(self, str):
data = "\n\n##### ERROR ##### %s" % str
if self.f:
self.f.write("%s \n" % data)
else:
print(data)
def log(self, str):
if self.f:
self.f.write("%s \n" % str)
else:
print(str)
#
# Shell Environment
#
class Env(object):
def __init__(self, logfile, tests):
self.tests = tests
self.log = Log(logfile)
def shell(self, cmd, path=os.getcwd()):
if path:
os.chdir(path)
self.log.log("==== shell session ===========================")
self.log.log("%s> %s" % (path, cmd))
status = subprocess.call("cd %s; %s" % (path, cmd), shell=True,
stdout=self.log.f, stderr=self.log.f)
self.log.log("status = %s" % status)
self.log.log("============================================== \n\n")
return status
def GetOutput(self, cmd, path=os.getcwd()):
if path:
os.chdir(path)
self.log.log("==== shell session ===========================")
self.log.log("%s> %s" % (path, cmd))
status, out = commands.getstatusoutput(cmd)
self.log.log("status = %s" % status)
self.log.log("out = %s" % out)
self.log.log("============================================== \n\n")
return status, out
#
# Pre-commit checker
#
class PreCommitChecker(Env):
def __init__(self, args):
Env.__init__(self, args.logfile, args.tests)
self.ignore_failure = args.ignore_failure
#
# Get commands for a given job from the determinator file
#
def get_commands(self, test):
status, out = self.GetOutput(
"RATIO=1 build_tools/rocksdb-lego-determinator %s" % test, ".")
return status, out
#
# Run a specific CI job
#
def run_test(self, test):
self.log.caption("Running test %s locally" % test)
# get commands for the CI job determinator
status, cmds = self.get_commands(test)
if status != 0:
self.log.error("Error getting commands for test %s" % test)
return False
# Parse the JSON to extract the commands to run
cmds = re.findall("'shell':'([^\']*)'", cmds)
if len(cmds) == 0:
self.log.log("No commands found")
return False
# Run commands
for cmd in cmds:
# Replace J=<..> with the local environment variable
if "J" in os.environ:
cmd = cmd.replace("J=1", "J=%s" % os.environ["J"])
cmd = cmd.replace("make ", "make -j%s " % os.environ["J"])
# Run the command
status = self.shell(cmd, ".")
if status != 0:
self.log.error("Error running command %s for test %s"
% (cmd, test))
return False
return True
#
# Run specified CI jobs
#
def run_tests(self):
if not self.tests:
self.log.error("Invalid args. Please provide tests")
return False
self.print_separator()
self.print_row("TEST", "RESULT")
self.print_separator()
result = True
for test in self.tests:
start_time = time.time()
self.print_test(test)
result = self.run_test(test)
elapsed_min = (time.time() - start_time) / 60
if not result:
self.log.error("Error running test %s" % test)
self.print_result("FAIL (%dm)" % elapsed_min)
if not self.ignore_failure:
return False
result = False
else:
self.print_result("PASS (%dm)" % elapsed_min)
self.print_separator()
return result
#
# Print a line
#
def print_separator(self):
print("".ljust(60, "-"))
#
# Print two colums
#
def print_row(self, c0, c1):
print("%s%s" % (c0.ljust(40), c1.ljust(20)))
def print_test(self, test):
print(test.ljust(40), end="")
sys.stdout.flush()
def print_result(self, result):
print(result.ljust(20))
#
# Main
#
parser = argparse.ArgumentParser(description='RocksDB pre-commit checker.')
# --log <logfile>
parser.add_argument('--logfile', default='/tmp/precommit-check.log',
help='Log file. Default is /tmp/precommit-check.log')
# --ignore_failure
parser.add_argument('--ignore_failure', action='store_true', default=False,
help='Stop when an error occurs')
# <test ....>
parser.add_argument('tests', nargs='+',
help='CI test(s) to run. e.g: unit punit asan tsan ubsan')
args = parser.parse_args()
checker = PreCommitChecker(args)
print("Please follow log %s" % checker.log.filename)
if not checker.run_tests():
print("Error running tests. Please check log file %s"
% checker.log.filename)
sys.exit(1)
sys.exit(0)

View File

@ -20,11 +20,26 @@ STAT_FILE=${STAT_FILE:-$(mktemp -t -u rocksdb_test_stats_XXXX)}
function cleanup {
rm -rf $DATA_DIR
rm -f $STAT_FILE.*
rm -f $STAT_FILE.fillseq
rm -f $STAT_FILE.readrandom
rm -f $STAT_FILE.overwrite
rm -f $STAT_FILE.memtablefillreadrandom
}
trap cleanup EXIT
if [ -z $GIT_BRANCH ]; then
git_br=`git rev-parse --abbrev-ref HEAD`
else
git_br=$(basename $GIT_BRANCH)
fi
if [ $git_br == "master" ]; then
git_br=""
else
git_br="."$git_br
fi
make release
# measure fillseq + fill up the DB for overwrite benchmark
@ -258,6 +273,7 @@ common_in_mem_args="--db=/dev/shm/rocksdb \
--value_size=100 \
--compression_type=none \
--compression_ratio=1 \
--hard_rate_limit=2 \
--write_buffer_size=134217728 \
--max_write_buffer_number=4 \
--level0_file_num_compaction_trigger=8 \
@ -270,10 +286,12 @@ common_in_mem_args="--db=/dev/shm/rocksdb \
--sync=0 \
--verify_checksum=1 \
--delete_obsolete_files_period_micros=314572800 \
--max_grandparent_overlap_factor=10 \
--use_plain_table=1 \
--open_files=-1 \
--mmap_read=1 \
--mmap_write=0 \
--memtablerep=prefix_hash \
--bloom_bits=10 \
--bloom_locality=1 \
--perf_level=0"
@ -360,7 +378,7 @@ function send_to_ods {
echo >&2 "ERROR: Key $key doesn't have a value."
return
fi
curl --silent "https://www.intern.facebook.com/intern/agent/ods_set.php?entity=rocksdb_build&key=$key&value=$value" \
curl --silent "https://www.intern.facebook.com/intern/agent/ods_set.php?entity=rocksdb_build$git_br&key=$key&value=$value" \
--connect-timeout 60
}

File diff suppressed because it is too large Load Diff

View File

@ -42,7 +42,7 @@ $RunOnly.Add("c_test") | Out-Null
$RunOnly.Add("compact_on_deletion_collector_test") | Out-Null
$RunOnly.Add("merge_test") | Out-Null
$RunOnly.Add("stringappend_test") | Out-Null # Apparently incorrectly written
$RunOnly.Add("backup_engine_test") | Out-Null # Disabled
$RunOnly.Add("backupable_db_test") | Out-Null # Disabled
$RunOnly.Add("timer_queue_test") | Out-Null # Not a gtest
if($RunAll -and $SuiteRun -ne "") {
@ -68,7 +68,7 @@ $BinariesFolder = -Join($RootFolder, "\build\Debug\")
if($WorkFolder -eq "") {
# If TEST_TMPDIR is set use it
# If TEST_TMPDIR is set use it
[string]$var = $Env:TEST_TMPDIR
if($var -eq "") {
$WorkFolder = -Join($RootFolder, "\db_tests\")
@ -93,7 +93,7 @@ $ExcludeCasesSet = New-Object System.Collections.Generic.HashSet[string]
if($ExcludeCases -ne "") {
Write-Host "ExcludeCases: $ExcludeCases"
$l = $ExcludeCases -split ' '
ForEach($t in $l) {
ForEach($t in $l) {
$ExcludeCasesSet.Add($t) | Out-Null
}
}
@ -102,7 +102,7 @@ $ExcludeExesSet = New-Object System.Collections.Generic.HashSet[string]
if($ExcludeExes -ne "") {
Write-Host "ExcludeExe: $ExcludeExes"
$l = $ExcludeExes -split ' '
ForEach($t in $l) {
ForEach($t in $l) {
$ExcludeExesSet.Add($t) | Out-Null
}
}
@ -118,10 +118,6 @@ if($ExcludeExes -ne "") {
# MultiThreaded/MultiThreadedDBTest.
# MultiThreaded/0 # GetParam() = 0
# MultiThreaded/1 # GetParam() = 1
# RibbonTypeParamTest/0. # TypeParam = struct DefaultTypesAndSettings
# CompactnessAndBacktrackAndFpRate
# Extremes
# FindOccupancyForSuccessRate
#
# into this:
#
@ -129,9 +125,6 @@ if($ExcludeExes -ne "") {
# DBTest.WriteEmptyBatch
# MultiThreaded/MultiThreadedDBTest.MultiThreaded/0
# MultiThreaded/MultiThreadedDBTest.MultiThreaded/1
# RibbonTypeParamTest/0.CompactnessAndBacktrackAndFpRate
# RibbonTypeParamTest/0.Extremes
# RibbonTypeParamTest/0.FindOccupancyForSuccessRate
#
# Output into the parameter in a form TestName -> Log File Name
function ExtractTestCases([string]$GTestExe, $HashTable) {
@ -145,8 +138,6 @@ function ExtractTestCases([string]$GTestExe, $HashTable) {
ForEach( $l in $Tests) {
# remove trailing comment if any
$l = $l -replace '\s+\#.*',''
# Leading whitespace is fine
$l = $l -replace '^\s+',''
# Trailing dot is a test group but no whitespace
@ -155,7 +146,8 @@ function ExtractTestCases([string]$GTestExe, $HashTable) {
} else {
# Otherwise it is a test name, remove leading space
$test = $l
# create a log name
# remove trailing comment if any and create a log name
$test = $test -replace '\s+\#.*',''
$test = "$Group$test"
if($ExcludeCasesSet.Contains($test)) {
@ -261,7 +253,7 @@ if($Run -ne "") {
$DiscoveredExe = @()
dir -Path $search_path | ForEach-Object {
$DiscoveredExe += ($_.Name)
$DiscoveredExe += ($_.Name)
}
# Remove exclusions
@ -301,7 +293,7 @@ if($SuiteRun -ne "") {
$ListOfExe = @()
dir -Path $search_path | ForEach-Object {
$ListOfExe += ($_.Name)
$ListOfExe += ($_.Name)
}
# Exclude those in RunOnly from running as suites
@ -356,7 +348,7 @@ function RunJobs($Suites, $TestCmds, [int]$ConcurrencyVal)
# Wait for all to finish and get the results
while(($JobToLog.Count -gt 0) -or
($TestCmds.Count -gt 0) -or
($TestCmds.Count -gt 0) -or
($Suites.Count -gt 0)) {
# Make sure we have maximum concurrent jobs running if anything
@ -476,8 +468,8 @@ RunJobs -Suites $CasesToRun -TestCmds $TestExes -ConcurrencyVal $Concurrency
$EndDate = (Get-Date)
New-TimeSpan -Start $StartDate -End $EndDate |
ForEach-Object {
New-TimeSpan -Start $StartDate -End $EndDate |
ForEach-Object {
"Elapsed time: {0:g}" -f $_
}
@ -491,3 +483,5 @@ if(!$script:success) {
}
exit 0

View File

@ -9,7 +9,6 @@ OUTPUT=""
function log_header()
{
echo "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved." >> "$OUTPUT"
echo "# The file is generated using update_dependencies.sh." >> "$OUTPUT"
}
@ -19,7 +18,7 @@ function log_variable()
}
TP2_LATEST="/data/users/$USER/fbsource/fbcode/third-party2/"
TP2_LATEST="/mnt/vol/engshare/fbcode/third-party2"
## $1 => lib name
## $2 => lib version (if not provided, will try to pick latest)
## $3 => platform (if not provided, will try to pick latest gcc)
@ -51,8 +50,6 @@ function get_lib_base()
fi
result=`ls -1d $result/*/ | head -n1`
echo Finding link $result
# lib_name => LIB_NAME_BASE
local __res_var=${lib_name^^}"_BASE"
@ -64,10 +61,10 @@ function get_lib_base()
}
###########################################################
# platform010 dependencies #
# platform007 dependencies #
###########################################################
OUTPUT="$BASEDIR/dependencies_platform010.sh"
OUTPUT="$BASEDIR/dependencies_platform007.sh"
rm -f "$OUTPUT"
touch "$OUTPUT"
@ -75,42 +72,40 @@ touch "$OUTPUT"
echo "Writing dependencies to $OUTPUT"
# Compilers locations
GCC_BASE=`readlink -f $TP2_LATEST/gcc/11.x/centos7-native/*/`
CLANG_BASE=`readlink -f $TP2_LATEST/llvm-fb/12/platform010/*/`
GCC_BASE=`readlink -f $TP2_LATEST/gcc/7.x/centos7-native/*/`
CLANG_BASE=`readlink -f $TP2_LATEST/llvm-fb/stable/centos7-native/*/`
log_header
log_variable GCC_BASE
log_variable CLANG_BASE
# Libraries locations
get_lib_base libgcc 11.x platform010
get_lib_base glibc 2.34 platform010
get_lib_base snappy LATEST platform010
get_lib_base zlib LATEST platform010
get_lib_base bzip2 LATEST platform010
get_lib_base lz4 LATEST platform010
get_lib_base zstd LATEST platform010
get_lib_base gflags LATEST platform010
get_lib_base jemalloc LATEST platform010
get_lib_base numa LATEST platform010
get_lib_base libunwind LATEST platform010
get_lib_base tbb 2018_U5 platform010
get_lib_base liburing LATEST platform010
get_lib_base benchmark LATEST platform010
get_lib_base libgcc 7.x platform007
get_lib_base glibc 2.26 platform007
get_lib_base snappy LATEST platform007
get_lib_base zlib LATEST platform007
get_lib_base bzip2 LATEST platform007
get_lib_base lz4 LATEST platform007
get_lib_base zstd LATEST platform007
get_lib_base gflags LATEST platform007
get_lib_base jemalloc LATEST platform007
get_lib_base numa LATEST platform007
get_lib_base libunwind LATEST platform007
get_lib_base tbb LATEST platform007
get_lib_base liburing LATEST platform007
get_lib_base kernel-headers fb platform010
get_lib_base kernel-headers fb platform007
get_lib_base binutils LATEST centos7-native
get_lib_base valgrind LATEST platform010
get_lib_base lua 5.3.4 platform010
get_lib_base valgrind LATEST platform007
get_lib_base lua 5.3.4 platform007
git diff $OUTPUT
###########################################################
# platform009 dependencies #
# 5.x dependencies #
###########################################################
OUTPUT="$BASEDIR/dependencies_platform009.sh"
OUTPUT="$BASEDIR/dependencies.sh"
rm -f "$OUTPUT"
touch "$OUTPUT"
@ -118,32 +113,70 @@ touch "$OUTPUT"
echo "Writing dependencies to $OUTPUT"
# Compilers locations
GCC_BASE=`readlink -f $TP2_LATEST/gcc/9.x/centos7-native/*/`
CLANG_BASE=`readlink -f $TP2_LATEST/llvm-fb/9.0.0/platform009/*/`
GCC_BASE=`readlink -f $TP2_LATEST/gcc/5.x/centos7-native/*/`
CLANG_BASE=`readlink -f $TP2_LATEST/llvm-fb/stable/centos7-native/*/`
log_header
log_variable GCC_BASE
log_variable CLANG_BASE
# Libraries locations
get_lib_base libgcc 9.x platform009
get_lib_base glibc 2.30 platform009
get_lib_base snappy LATEST platform009
get_lib_base zlib LATEST platform009
get_lib_base bzip2 LATEST platform009
get_lib_base lz4 LATEST platform009
get_lib_base zstd LATEST platform009
get_lib_base gflags LATEST platform009
get_lib_base jemalloc LATEST platform009
get_lib_base numa LATEST platform009
get_lib_base libunwind LATEST platform009
get_lib_base tbb 2018_U5 platform009
get_lib_base liburing LATEST platform009
get_lib_base benchmark LATEST platform009
get_lib_base libgcc 5.x gcc-5-glibc-2.23
get_lib_base glibc 2.23 gcc-5-glibc-2.23
get_lib_base snappy LATEST gcc-5-glibc-2.23
get_lib_base zlib LATEST gcc-5-glibc-2.23
get_lib_base bzip2 LATEST gcc-5-glibc-2.23
get_lib_base lz4 LATEST gcc-5-glibc-2.23
get_lib_base zstd LATEST gcc-5-glibc-2.23
get_lib_base gflags LATEST gcc-5-glibc-2.23
get_lib_base jemalloc LATEST gcc-5-glibc-2.23
get_lib_base numa LATEST gcc-5-glibc-2.23
get_lib_base libunwind LATEST gcc-5-glibc-2.23
get_lib_base tbb LATEST gcc-5-glibc-2.23
get_lib_base kernel-headers fb platform009
get_lib_base kernel-headers 4.0.9-36_fbk5_2933_gd092e3f gcc-5-glibc-2.23
get_lib_base binutils LATEST centos7-native
get_lib_base valgrind LATEST platform009
get_lib_base lua 5.3.4 platform009
get_lib_base valgrind LATEST gcc-5-glibc-2.23
get_lib_base lua 5.2.3 gcc-5-glibc-2.23
git diff $OUTPUT
###########################################################
# 4.8.1 dependencies #
###########################################################
OUTPUT="$BASEDIR/dependencies_4.8.1.sh"
rm -f "$OUTPUT"
touch "$OUTPUT"
echo "Writing 4.8.1 dependencies to $OUTPUT"
# Compilers locations
GCC_BASE=`readlink -f $TP2_LATEST/gcc/4.8.1/centos6-native/*/`
CLANG_BASE=`readlink -f $TP2_LATEST/llvm-fb/stable/centos6-native/*/`
log_header
log_variable GCC_BASE
log_variable CLANG_BASE
# Libraries locations
get_lib_base libgcc 4.8.1 gcc-4.8.1-glibc-2.17
get_lib_base glibc 2.17 gcc-4.8.1-glibc-2.17
get_lib_base snappy LATEST gcc-4.8.1-glibc-2.17
get_lib_base zlib LATEST gcc-4.8.1-glibc-2.17
get_lib_base bzip2 LATEST gcc-4.8.1-glibc-2.17
get_lib_base lz4 LATEST gcc-4.8.1-glibc-2.17
get_lib_base zstd LATEST gcc-4.8.1-glibc-2.17
get_lib_base gflags LATEST gcc-4.8.1-glibc-2.17
get_lib_base jemalloc LATEST gcc-4.8.1-glibc-2.17
get_lib_base numa LATEST gcc-4.8.1-glibc-2.17
get_lib_base libunwind LATEST gcc-4.8.1-glibc-2.17
get_lib_base tbb 4.0_update2 gcc-4.8.1-glibc-2.17
get_lib_base kernel-headers LATEST gcc-4.8.1-glibc-2.17
get_lib_base binutils LATEST centos6-native
get_lib_base valgrind 3.8.1 gcc-4.8.1-glibc-2.17
get_lib_base lua 5.2.3 centos6-native
git diff $OUTPUT

30
cache/cache.cc vendored
View File

@ -10,9 +10,7 @@
#include "rocksdb/cache.h"
#include "cache/lru_cache.h"
#include "rocksdb/secondary_cache.h"
#include "rocksdb/utilities/customizable_util.h"
#include "rocksdb/utilities/options_type.h"
#include "options/options_helper.h"
#include "util/string_util.h"
namespace ROCKSDB_NAMESPACE {
@ -21,28 +19,24 @@ static std::unordered_map<std::string, OptionTypeInfo>
lru_cache_options_type_info = {
{"capacity",
{offsetof(struct LRUCacheOptions, capacity), OptionType::kSizeT,
OptionVerificationType::kNormal, OptionTypeFlags::kMutable}},
OptionVerificationType::kNormal, OptionTypeFlags::kMutable,
offsetof(struct LRUCacheOptions, capacity)}},
{"num_shard_bits",
{offsetof(struct LRUCacheOptions, num_shard_bits), OptionType::kInt,
OptionVerificationType::kNormal, OptionTypeFlags::kMutable}},
OptionVerificationType::kNormal, OptionTypeFlags::kMutable,
offsetof(struct LRUCacheOptions, num_shard_bits)}},
{"strict_capacity_limit",
{offsetof(struct LRUCacheOptions, strict_capacity_limit),
OptionType::kBoolean, OptionVerificationType::kNormal,
OptionTypeFlags::kMutable}},
OptionTypeFlags::kMutable,
offsetof(struct LRUCacheOptions, strict_capacity_limit)}},
{"high_pri_pool_ratio",
{offsetof(struct LRUCacheOptions, high_pri_pool_ratio),
OptionType::kDouble, OptionVerificationType::kNormal,
OptionTypeFlags::kMutable}},
};
OptionTypeFlags::kMutable,
offsetof(struct LRUCacheOptions, high_pri_pool_ratio)}}};
#endif // ROCKSDB_LITE
Status SecondaryCache::CreateFromString(
const ConfigOptions& config_options, const std::string& value,
std::shared_ptr<SecondaryCache>* result) {
return LoadSharedObject<SecondaryCache>(config_options, value, nullptr,
result);
}
Status Cache::CreateFromString(const ConfigOptions& config_options,
const std::string& value,
std::shared_ptr<Cache>* result) {
@ -53,9 +47,9 @@ Status Cache::CreateFromString(const ConfigOptions& config_options,
} else {
#ifndef ROCKSDB_LITE
LRUCacheOptions cache_opts;
status = OptionTypeInfo::ParseStruct(config_options, "",
&lru_cache_options_type_info, "",
value, &cache_opts);
status = OptionTypeInfo::ParseStruct(
config_options, "", &lru_cache_options_type_info, "", value,
reinterpret_cast<char*>(&cache_opts));
if (status.ok()) {
cache = NewLRUCache(cache_opts);
}

377
cache/cache_bench.cc vendored
View File

@ -1,11 +1,8 @@
// Copyright (c) 2013-present, Facebook, Inc. All rights reserved.
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#ifndef GFLAGS
#include <cstdio>
int main() {
@ -13,8 +10,372 @@ int main() {
return 1;
}
#else
#include "rocksdb/cache_bench_tool.h"
int main(int argc, char** argv) {
return ROCKSDB_NAMESPACE::cache_bench_tool(argc, argv);
#include <stdio.h>
#include <sys/types.h>
#include <cinttypes>
#include <limits>
#include "port/port.h"
#include "rocksdb/cache.h"
#include "rocksdb/db.h"
#include "rocksdb/env.h"
#include "util/coding.h"
#include "util/gflags_compat.h"
#include "util/hash.h"
#include "util/mutexlock.h"
#include "util/random.h"
using GFLAGS_NAMESPACE::ParseCommandLineFlags;
static constexpr uint32_t KiB = uint32_t{1} << 10;
static constexpr uint32_t MiB = KiB << 10;
static constexpr uint64_t GiB = MiB << 10;
DEFINE_uint32(threads, 16, "Number of concurrent threads to run.");
DEFINE_uint64(cache_size, 1 * GiB,
"Number of bytes to use as a cache of uncompressed data.");
DEFINE_uint32(num_shard_bits, 6, "shard_bits.");
DEFINE_double(resident_ratio, 0.25,
"Ratio of keys fitting in cache to keyspace.");
DEFINE_uint64(ops_per_thread, 0,
"Number of operations per thread. (Default: 5 * keyspace size)");
DEFINE_uint32(value_bytes, 8 * KiB, "Size of each value added.");
DEFINE_uint32(skew, 5, "Degree of skew in key selection");
DEFINE_bool(populate_cache, true, "Populate cache before operations");
DEFINE_uint32(lookup_insert_percent, 87,
"Ratio of lookup (+ insert on not found) to total workload "
"(expressed as a percentage)");
DEFINE_uint32(insert_percent, 2,
"Ratio of insert to total workload (expressed as a percentage)");
DEFINE_uint32(lookup_percent, 10,
"Ratio of lookup to total workload (expressed as a percentage)");
DEFINE_uint32(erase_percent, 1,
"Ratio of erase to total workload (expressed as a percentage)");
DEFINE_bool(use_clock_cache, false, "");
namespace ROCKSDB_NAMESPACE {
class CacheBench;
namespace {
// State shared by all concurrent executions of the same benchmark.
class SharedState {
public:
explicit SharedState(CacheBench* cache_bench)
: cv_(&mu_),
num_initialized_(0),
start_(false),
num_done_(0),
cache_bench_(cache_bench) {}
~SharedState() {}
port::Mutex* GetMutex() {
return &mu_;
}
port::CondVar* GetCondVar() {
return &cv_;
}
CacheBench* GetCacheBench() const {
return cache_bench_;
}
void IncInitialized() {
num_initialized_++;
}
void IncDone() {
num_done_++;
}
bool AllInitialized() const { return num_initialized_ >= FLAGS_threads; }
bool AllDone() const { return num_done_ >= FLAGS_threads; }
void SetStart() {
start_ = true;
}
bool Started() const {
return start_;
}
private:
port::Mutex mu_;
port::CondVar cv_;
uint64_t num_initialized_;
bool start_;
uint64_t num_done_;
CacheBench* cache_bench_;
};
// Per-thread state for concurrent executions of the same benchmark.
struct ThreadState {
uint32_t tid;
Random64 rnd;
SharedState* shared;
ThreadState(uint32_t index, SharedState* _shared)
: tid(index), rnd(1000 + index), shared(_shared) {}
};
struct KeyGen {
char key_data[27];
Slice GetRand(Random64& rnd, uint64_t max_key) {
uint64_t raw = rnd.Next();
// Skew according to setting
for (uint32_t i = 0; i < FLAGS_skew; ++i) {
raw = std::min(raw, rnd.Next());
}
uint64_t key = fastrange64(raw, max_key);
// Variable size and alignment
size_t off = key % 8;
key_data[0] = char{42};
EncodeFixed64(key_data + 1, key);
key_data[9] = char{11};
EncodeFixed64(key_data + 10, key);
key_data[18] = char{4};
EncodeFixed64(key_data + 19, key);
return Slice(&key_data[off], sizeof(key_data) - off);
}
};
char* createValue(Random64& rnd) {
char* rv = new char[FLAGS_value_bytes];
// Fill with some filler data, and take some CPU time
for (uint32_t i = 0; i < FLAGS_value_bytes; i += 8) {
EncodeFixed64(rv + i, rnd.Next());
}
return rv;
}
void deleter(const Slice& /*key*/, void* value) {
delete[] static_cast<char*>(value);
}
} // namespace
class CacheBench {
static constexpr uint64_t kHundredthUint64 =
std::numeric_limits<uint64_t>::max() / 100U;
public:
CacheBench()
: max_key_(static_cast<uint64_t>(FLAGS_cache_size / FLAGS_resident_ratio /
FLAGS_value_bytes)),
lookup_insert_threshold_(kHundredthUint64 *
FLAGS_lookup_insert_percent),
insert_threshold_(lookup_insert_threshold_ +
kHundredthUint64 * FLAGS_insert_percent),
lookup_threshold_(insert_threshold_ +
kHundredthUint64 * FLAGS_lookup_percent),
erase_threshold_(lookup_threshold_ +
kHundredthUint64 * FLAGS_erase_percent) {
if (erase_threshold_ != 100U * kHundredthUint64) {
fprintf(stderr, "Percentages must add to 100.\n");
exit(1);
}
if (FLAGS_use_clock_cache) {
cache_ = NewClockCache(FLAGS_cache_size, FLAGS_num_shard_bits);
if (!cache_) {
fprintf(stderr, "Clock cache not supported.\n");
exit(1);
}
} else {
cache_ = NewLRUCache(FLAGS_cache_size, FLAGS_num_shard_bits);
}
if (FLAGS_ops_per_thread == 0) {
FLAGS_ops_per_thread = 5 * max_key_;
}
}
~CacheBench() {}
void PopulateCache() {
Random64 rnd(1);
KeyGen keygen;
for (uint64_t i = 0; i < 2 * FLAGS_cache_size; i += FLAGS_value_bytes) {
cache_->Insert(keygen.GetRand(rnd, max_key_), createValue(rnd),
FLAGS_value_bytes, &deleter);
}
}
bool Run() {
ROCKSDB_NAMESPACE::Env* env = ROCKSDB_NAMESPACE::Env::Default();
PrintEnv();
SharedState shared(this);
std::vector<std::unique_ptr<ThreadState> > threads(FLAGS_threads);
for (uint32_t i = 0; i < FLAGS_threads; i++) {
threads[i].reset(new ThreadState(i, &shared));
env->StartThread(ThreadBody, threads[i].get());
}
{
MutexLock l(shared.GetMutex());
while (!shared.AllInitialized()) {
shared.GetCondVar()->Wait();
}
// Record start time
uint64_t start_time = env->NowMicros();
// Start all threads
shared.SetStart();
shared.GetCondVar()->SignalAll();
// Wait threads to complete
while (!shared.AllDone()) {
shared.GetCondVar()->Wait();
}
// Record end time
uint64_t end_time = env->NowMicros();
double elapsed = static_cast<double>(end_time - start_time) * 1e-6;
uint32_t qps = static_cast<uint32_t>(
static_cast<double>(FLAGS_threads * FLAGS_ops_per_thread) / elapsed);
fprintf(stdout, "Complete in %.3f s; QPS = %u\n", elapsed, qps);
}
return true;
}
private:
std::shared_ptr<Cache> cache_;
const uint64_t max_key_;
// Cumulative thresholds in the space of a random uint64_t
const uint64_t lookup_insert_threshold_;
const uint64_t insert_threshold_;
const uint64_t lookup_threshold_;
const uint64_t erase_threshold_;
static void ThreadBody(void* v) {
ThreadState* thread = static_cast<ThreadState*>(v);
SharedState* shared = thread->shared;
{
MutexLock l(shared->GetMutex());
shared->IncInitialized();
if (shared->AllInitialized()) {
shared->GetCondVar()->SignalAll();
}
while (!shared->Started()) {
shared->GetCondVar()->Wait();
}
}
thread->shared->GetCacheBench()->OperateCache(thread);
{
MutexLock l(shared->GetMutex());
shared->IncDone();
if (shared->AllDone()) {
shared->GetCondVar()->SignalAll();
}
}
}
void OperateCache(ThreadState* thread) {
// To use looked-up values
uint64_t result = 0;
// To hold handles for a non-trivial amount of time
Cache::Handle* handle = nullptr;
KeyGen gen;
for (uint64_t i = 0; i < FLAGS_ops_per_thread; i++) {
Slice key = gen.GetRand(thread->rnd, max_key_);
uint64_t random_op = thread->rnd.Next();
if (random_op < lookup_insert_threshold_) {
if (handle) {
cache_->Release(handle);
handle = nullptr;
}
// do lookup
handle = cache_->Lookup(key);
if (handle) {
// do something with the data
result += NPHash64(static_cast<char*>(cache_->Value(handle)),
FLAGS_value_bytes);
} else {
// do insert
cache_->Insert(key, createValue(thread->rnd), FLAGS_value_bytes,
&deleter, &handle);
}
} else if (random_op < insert_threshold_) {
if (handle) {
cache_->Release(handle);
handle = nullptr;
}
// do insert
cache_->Insert(key, createValue(thread->rnd), FLAGS_value_bytes,
&deleter, &handle);
} else if (random_op < lookup_threshold_) {
if (handle) {
cache_->Release(handle);
handle = nullptr;
}
// do lookup
handle = cache_->Lookup(key);
if (handle) {
// do something with the data
result += NPHash64(static_cast<char*>(cache_->Value(handle)),
FLAGS_value_bytes);
}
} else if (random_op < erase_threshold_) {
// do erase
cache_->Erase(key);
} else {
// Should be extremely unlikely (noop)
assert(random_op >= kHundredthUint64 * 100U);
}
}
if (handle) {
cache_->Release(handle);
handle = nullptr;
}
}
void PrintEnv() const {
printf("RocksDB version : %d.%d\n", kMajorVersion, kMinorVersion);
printf("Number of threads : %u\n", FLAGS_threads);
printf("Ops per thread : %" PRIu64 "\n", FLAGS_ops_per_thread);
printf("Cache size : %" PRIu64 "\n", FLAGS_cache_size);
printf("Num shard bits : %u\n", FLAGS_num_shard_bits);
printf("Max key : %" PRIu64 "\n", max_key_);
printf("Resident ratio : %g\n", FLAGS_resident_ratio);
printf("Skew degree : %u\n", FLAGS_skew);
printf("Populate cache : %d\n", int{FLAGS_populate_cache});
printf("Lookup+Insert pct : %u%%\n", FLAGS_lookup_insert_percent);
printf("Insert percentage : %u%%\n", FLAGS_insert_percent);
printf("Lookup percentage : %u%%\n", FLAGS_lookup_percent);
printf("Erase percentage : %u%%\n", FLAGS_erase_percent);
printf("----------------------------\n");
}
};
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
ParseCommandLineFlags(&argc, &argv, true);
if (FLAGS_threads <= 0) {
fprintf(stderr, "threads number <= 0\n");
exit(1);
}
ROCKSDB_NAMESPACE::CacheBench bench;
if (FLAGS_populate_cache) {
bench.PopulateCache();
printf("Population complete\n");
printf("----------------------------\n");
}
if (bench.Run()) {
return 0;
} else {
return 1;
}
}
#endif // GFLAGS

View File

@ -1,924 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#ifdef GFLAGS
#include <cinttypes>
#include <cstddef>
#include <cstdio>
#include <limits>
#include <memory>
#include <set>
#include <sstream>
#include "db/db_impl/db_impl.h"
#include "monitoring/histogram.h"
#include "port/port.h"
#include "rocksdb/cache.h"
#include "rocksdb/convenience.h"
#include "rocksdb/db.h"
#include "rocksdb/env.h"
#include "rocksdb/secondary_cache.h"
#include "rocksdb/system_clock.h"
#include "rocksdb/table_properties.h"
#include "table/block_based/block_based_table_reader.h"
#include "table/block_based/cachable_entry.h"
#include "util/coding.h"
#include "util/gflags_compat.h"
#include "util/hash.h"
#include "util/mutexlock.h"
#include "util/random.h"
#include "util/stop_watch.h"
#include "util/string_util.h"
using GFLAGS_NAMESPACE::ParseCommandLineFlags;
static constexpr uint32_t KiB = uint32_t{1} << 10;
static constexpr uint32_t MiB = KiB << 10;
static constexpr uint64_t GiB = MiB << 10;
DEFINE_uint32(threads, 16, "Number of concurrent threads to run.");
DEFINE_uint64(cache_size, 1 * GiB,
"Number of bytes to use as a cache of uncompressed data.");
DEFINE_uint32(num_shard_bits, 6, "shard_bits.");
DEFINE_double(resident_ratio, 0.25,
"Ratio of keys fitting in cache to keyspace.");
DEFINE_uint64(ops_per_thread, 2000000U, "Number of operations per thread.");
DEFINE_uint32(value_bytes, 8 * KiB, "Size of each value added.");
DEFINE_uint32(skew, 5, "Degree of skew in key selection");
DEFINE_bool(populate_cache, true, "Populate cache before operations");
DEFINE_uint32(lookup_insert_percent, 87,
"Ratio of lookup (+ insert on not found) to total workload "
"(expressed as a percentage)");
DEFINE_uint32(insert_percent, 2,
"Ratio of insert to total workload (expressed as a percentage)");
DEFINE_uint32(lookup_percent, 10,
"Ratio of lookup to total workload (expressed as a percentage)");
DEFINE_uint32(erase_percent, 1,
"Ratio of erase to total workload (expressed as a percentage)");
DEFINE_bool(gather_stats, false,
"Whether to periodically simulate gathering block cache stats, "
"using one more thread.");
DEFINE_uint32(
gather_stats_sleep_ms, 1000,
"How many milliseconds to sleep between each gathering of stats.");
DEFINE_uint32(gather_stats_entries_per_lock, 256,
"For Cache::ApplyToAllEntries");
DEFINE_bool(skewed, false, "If true, skew the key access distribution");
#ifndef ROCKSDB_LITE
DEFINE_string(secondary_cache_uri, "",
"Full URI for creating a custom secondary cache object");
static class std::shared_ptr<ROCKSDB_NAMESPACE::SecondaryCache> secondary_cache;
#endif // ROCKSDB_LITE
DEFINE_bool(use_clock_cache, false, "");
// ## BEGIN stress_cache_key sub-tool options ##
// See class StressCacheKey below.
DEFINE_bool(stress_cache_key, false,
"If true, run cache key stress test instead");
DEFINE_uint32(
sck_files_per_day, 2500000,
"(-stress_cache_key) Simulated files generated per simulated day");
// NOTE: Giving each run a specified lifetime, rather than e.g. "until
// first collision" ensures equal skew from start-up, when collisions are
// less likely.
DEFINE_uint32(sck_days_per_run, 90,
"(-stress_cache_key) Number of days to simulate in each run");
// NOTE: The number of observed collisions directly affects the relative
// accuracy of the predicted probabilities. 15 observations should be well
// within factor-of-2 accuracy.
DEFINE_uint32(
sck_min_collision, 15,
"(-stress_cache_key) Keep running until this many collisions seen");
// sck_file_size_mb can be thought of as average file size. The simulation is
// not precise enough to care about the distribution of file sizes; other
// simulations (https://github.com/pdillinger/unique_id/tree/main/monte_carlo)
// indicate the distribution only makes a small difference (e.g. < 2x factor)
DEFINE_uint32(
sck_file_size_mb, 32,
"(-stress_cache_key) Simulated file size in MiB, for accounting purposes");
DEFINE_uint32(sck_reopen_nfiles, 100,
"(-stress_cache_key) Simulate DB re-open average every n files");
DEFINE_uint32(sck_restarts_per_day, 24,
"(-stress_cache_key) Average simulated process restarts per day "
"(across DBs)");
DEFINE_uint32(
sck_db_count, 100,
"(-stress_cache_key) Parallel DBs in simulation sharing a block cache");
DEFINE_uint32(
sck_table_bits, 20,
"(-stress_cache_key) Log2 number of tracked (live) files (across DBs)");
// sck_keep_bits being well below full 128 bits amplifies the collision
// probability so that the true probability can be estimated through observed
// collisions. (More explanation below.)
DEFINE_uint32(
sck_keep_bits, 50,
"(-stress_cache_key) Number of bits to keep from each cache key (<= 64)");
// sck_randomize is used to validate whether cache key is performing "better
// than random." Even with this setting, file offsets are not randomized.
DEFINE_bool(sck_randomize, false,
"(-stress_cache_key) Randomize (hash) cache key");
// See https://github.com/facebook/rocksdb/pull/9058
DEFINE_bool(sck_footer_unique_id, false,
"(-stress_cache_key) Simulate using proposed footer unique id");
// ## END stress_cache_key sub-tool options ##
namespace ROCKSDB_NAMESPACE {
class CacheBench;
namespace {
// State shared by all concurrent executions of the same benchmark.
class SharedState {
public:
explicit SharedState(CacheBench* cache_bench)
: cv_(&mu_),
num_initialized_(0),
start_(false),
num_done_(0),
cache_bench_(cache_bench) {}
~SharedState() {}
port::Mutex* GetMutex() { return &mu_; }
port::CondVar* GetCondVar() { return &cv_; }
CacheBench* GetCacheBench() const { return cache_bench_; }
void IncInitialized() { num_initialized_++; }
void IncDone() { num_done_++; }
bool AllInitialized() const { return num_initialized_ >= FLAGS_threads; }
bool AllDone() const { return num_done_ >= FLAGS_threads; }
void SetStart() { start_ = true; }
bool Started() const { return start_; }
private:
port::Mutex mu_;
port::CondVar cv_;
uint64_t num_initialized_;
bool start_;
uint64_t num_done_;
CacheBench* cache_bench_;
};
// Per-thread state for concurrent executions of the same benchmark.
struct ThreadState {
uint32_t tid;
Random64 rnd;
SharedState* shared;
HistogramImpl latency_ns_hist;
uint64_t duration_us = 0;
ThreadState(uint32_t index, SharedState* _shared)
: tid(index), rnd(1000 + index), shared(_shared) {}
};
struct KeyGen {
char key_data[27];
Slice GetRand(Random64& rnd, uint64_t max_key, int max_log) {
uint64_t key = 0;
if (!FLAGS_skewed) {
uint64_t raw = rnd.Next();
// Skew according to setting
for (uint32_t i = 0; i < FLAGS_skew; ++i) {
raw = std::min(raw, rnd.Next());
}
key = FastRange64(raw, max_key);
} else {
key = rnd.Skewed(max_log);
if (key > max_key) {
key -= max_key;
}
}
// Variable size and alignment
size_t off = key % 8;
key_data[0] = char{42};
EncodeFixed64(key_data + 1, key);
key_data[9] = char{11};
EncodeFixed64(key_data + 10, key);
key_data[18] = char{4};
EncodeFixed64(key_data + 19, key);
return Slice(&key_data[off], sizeof(key_data) - off);
}
};
char* createValue(Random64& rnd) {
char* rv = new char[FLAGS_value_bytes];
// Fill with some filler data, and take some CPU time
for (uint32_t i = 0; i < FLAGS_value_bytes; i += 8) {
EncodeFixed64(rv + i, rnd.Next());
}
return rv;
}
// Callbacks for secondary cache
size_t SizeFn(void* /*obj*/) { return FLAGS_value_bytes; }
Status SaveToFn(void* obj, size_t /*offset*/, size_t size, void* out) {
memcpy(out, obj, size);
return Status::OK();
}
// Different deleters to simulate using deleter to gather
// stats on the code origin and kind of cache entries.
void deleter1(const Slice& /*key*/, void* value) {
delete[] static_cast<char*>(value);
}
void deleter2(const Slice& /*key*/, void* value) {
delete[] static_cast<char*>(value);
}
void deleter3(const Slice& /*key*/, void* value) {
delete[] static_cast<char*>(value);
}
Cache::CacheItemHelper helper1(SizeFn, SaveToFn, deleter1);
Cache::CacheItemHelper helper2(SizeFn, SaveToFn, deleter2);
Cache::CacheItemHelper helper3(SizeFn, SaveToFn, deleter3);
} // namespace
class CacheBench {
static constexpr uint64_t kHundredthUint64 =
std::numeric_limits<uint64_t>::max() / 100U;
public:
CacheBench()
: max_key_(static_cast<uint64_t>(FLAGS_cache_size / FLAGS_resident_ratio /
FLAGS_value_bytes)),
lookup_insert_threshold_(kHundredthUint64 *
FLAGS_lookup_insert_percent),
insert_threshold_(lookup_insert_threshold_ +
kHundredthUint64 * FLAGS_insert_percent),
lookup_threshold_(insert_threshold_ +
kHundredthUint64 * FLAGS_lookup_percent),
erase_threshold_(lookup_threshold_ +
kHundredthUint64 * FLAGS_erase_percent),
skewed_(FLAGS_skewed) {
if (erase_threshold_ != 100U * kHundredthUint64) {
fprintf(stderr, "Percentages must add to 100.\n");
exit(1);
}
max_log_ = 0;
if (skewed_) {
uint64_t max_key = max_key_;
while (max_key >>= 1) max_log_++;
if (max_key > (static_cast<uint64_t>(1) << max_log_)) max_log_++;
}
if (FLAGS_use_clock_cache) {
cache_ = NewClockCache(FLAGS_cache_size, FLAGS_num_shard_bits);
if (!cache_) {
fprintf(stderr, "Clock cache not supported.\n");
exit(1);
}
} else {
LRUCacheOptions opts(FLAGS_cache_size, FLAGS_num_shard_bits, false, 0.5);
#ifndef ROCKSDB_LITE
if (!FLAGS_secondary_cache_uri.empty()) {
Status s = SecondaryCache::CreateFromString(
ConfigOptions(), FLAGS_secondary_cache_uri, &secondary_cache);
if (secondary_cache == nullptr) {
fprintf(
stderr,
"No secondary cache registered matching string: %s status=%s\n",
FLAGS_secondary_cache_uri.c_str(), s.ToString().c_str());
exit(1);
}
opts.secondary_cache = secondary_cache;
}
#endif // ROCKSDB_LITE
cache_ = NewLRUCache(opts);
}
}
~CacheBench() {}
void PopulateCache() {
Random64 rnd(1);
KeyGen keygen;
for (uint64_t i = 0; i < 2 * FLAGS_cache_size; i += FLAGS_value_bytes) {
cache_->Insert(keygen.GetRand(rnd, max_key_, max_log_), createValue(rnd),
&helper1, FLAGS_value_bytes);
}
}
bool Run() {
const auto clock = SystemClock::Default().get();
PrintEnv();
SharedState shared(this);
std::vector<std::unique_ptr<ThreadState> > threads(FLAGS_threads);
for (uint32_t i = 0; i < FLAGS_threads; i++) {
threads[i].reset(new ThreadState(i, &shared));
std::thread(ThreadBody, threads[i].get()).detach();
}
HistogramImpl stats_hist;
std::string stats_report;
std::thread stats_thread(StatsBody, &shared, &stats_hist, &stats_report);
uint64_t start_time;
{
MutexLock l(shared.GetMutex());
while (!shared.AllInitialized()) {
shared.GetCondVar()->Wait();
}
// Record start time
start_time = clock->NowMicros();
// Start all threads
shared.SetStart();
shared.GetCondVar()->SignalAll();
// Wait threads to complete
while (!shared.AllDone()) {
shared.GetCondVar()->Wait();
}
}
// Stats gathering is considered background work. This time measurement
// is for foreground work, and not really ideal for that. See below.
uint64_t end_time = clock->NowMicros();
stats_thread.join();
// Wall clock time - includes idle time if threads
// finish at different times (not ideal).
double elapsed_secs = static_cast<double>(end_time - start_time) * 1e-6;
uint32_t ops_per_sec = static_cast<uint32_t>(
1.0 * FLAGS_threads * FLAGS_ops_per_thread / elapsed_secs);
printf("Complete in %.3f s; Rough parallel ops/sec = %u\n", elapsed_secs,
ops_per_sec);
// Total time in each thread (more accurate throughput measure)
elapsed_secs = 0;
for (uint32_t i = 0; i < FLAGS_threads; i++) {
elapsed_secs += threads[i]->duration_us * 1e-6;
}
ops_per_sec = static_cast<uint32_t>(1.0 * FLAGS_threads *
FLAGS_ops_per_thread / elapsed_secs);
printf("Thread ops/sec = %u\n", ops_per_sec);
printf("\nOperation latency (ns):\n");
HistogramImpl combined;
for (uint32_t i = 0; i < FLAGS_threads; i++) {
combined.Merge(threads[i]->latency_ns_hist);
}
printf("%s", combined.ToString().c_str());
if (FLAGS_gather_stats) {
printf("\nGather stats latency (us):\n");
printf("%s", stats_hist.ToString().c_str());
}
printf("\n%s", stats_report.c_str());
return true;
}
private:
std::shared_ptr<Cache> cache_;
const uint64_t max_key_;
// Cumulative thresholds in the space of a random uint64_t
const uint64_t lookup_insert_threshold_;
const uint64_t insert_threshold_;
const uint64_t lookup_threshold_;
const uint64_t erase_threshold_;
const bool skewed_;
int max_log_;
// A benchmark version of gathering stats on an active block cache by
// iterating over it. The primary purpose is to measure the impact of
// gathering stats with ApplyToAllEntries on throughput- and
// latency-sensitive Cache users. Performance of stats gathering is
// also reported. The last set of gathered stats is also reported, for
// manual sanity checking for logical errors or other unexpected
// behavior of cache_bench or the underlying Cache.
static void StatsBody(SharedState* shared, HistogramImpl* stats_hist,
std::string* stats_report) {
if (!FLAGS_gather_stats) {
return;
}
const auto clock = SystemClock::Default().get();
uint64_t total_key_size = 0;
uint64_t total_charge = 0;
uint64_t total_entry_count = 0;
std::set<Cache::DeleterFn> deleters;
StopWatchNano timer(clock);
for (;;) {
uint64_t time;
time = clock->NowMicros();
uint64_t deadline = time + uint64_t{FLAGS_gather_stats_sleep_ms} * 1000;
{
MutexLock l(shared->GetMutex());
for (;;) {
if (shared->AllDone()) {
std::ostringstream ostr;
ostr << "Most recent cache entry stats:\n"
<< "Number of entries: " << total_entry_count << "\n"
<< "Total charge: " << BytesToHumanString(total_charge) << "\n"
<< "Average key size: "
<< (1.0 * total_key_size / total_entry_count) << "\n"
<< "Average charge: "
<< BytesToHumanString(static_cast<uint64_t>(
1.0 * total_charge / total_entry_count))
<< "\n"
<< "Unique deleters: " << deleters.size() << "\n";
*stats_report = ostr.str();
return;
}
if (clock->NowMicros() >= deadline) {
break;
}
uint64_t diff = deadline - std::min(clock->NowMicros(), deadline);
shared->GetCondVar()->TimedWait(diff + 1);
}
}
// Now gather stats, outside of mutex
total_key_size = 0;
total_charge = 0;
total_entry_count = 0;
deleters.clear();
auto fn = [&](const Slice& key, void* /*value*/, size_t charge,
Cache::DeleterFn deleter) {
total_key_size += key.size();
total_charge += charge;
++total_entry_count;
// Something slightly more expensive as in (future) stats by category
deleters.insert(deleter);
};
timer.Start();
Cache::ApplyToAllEntriesOptions opts;
opts.average_entries_per_lock = FLAGS_gather_stats_entries_per_lock;
shared->GetCacheBench()->cache_->ApplyToAllEntries(fn, opts);
stats_hist->Add(timer.ElapsedNanos() / 1000);
}
}
static void ThreadBody(ThreadState* thread) {
SharedState* shared = thread->shared;
{
MutexLock l(shared->GetMutex());
shared->IncInitialized();
if (shared->AllInitialized()) {
shared->GetCondVar()->SignalAll();
}
while (!shared->Started()) {
shared->GetCondVar()->Wait();
}
}
thread->shared->GetCacheBench()->OperateCache(thread);
{
MutexLock l(shared->GetMutex());
shared->IncDone();
if (shared->AllDone()) {
shared->GetCondVar()->SignalAll();
}
}
}
void OperateCache(ThreadState* thread) {
// To use looked-up values
uint64_t result = 0;
// To hold handles for a non-trivial amount of time
Cache::Handle* handle = nullptr;
KeyGen gen;
const auto clock = SystemClock::Default().get();
uint64_t start_time = clock->NowMicros();
StopWatchNano timer(clock);
for (uint64_t i = 0; i < FLAGS_ops_per_thread; i++) {
timer.Start();
Slice key = gen.GetRand(thread->rnd, max_key_, max_log_);
uint64_t random_op = thread->rnd.Next();
Cache::CreateCallback create_cb = [](const void* buf, size_t size,
void** out_obj,
size_t* charge) -> Status {
*out_obj = reinterpret_cast<void*>(new char[size]);
memcpy(*out_obj, buf, size);
*charge = size;
return Status::OK();
};
if (random_op < lookup_insert_threshold_) {
if (handle) {
cache_->Release(handle);
handle = nullptr;
}
// do lookup
handle = cache_->Lookup(key, &helper2, create_cb, Cache::Priority::LOW,
true);
if (handle) {
// do something with the data
result += NPHash64(static_cast<char*>(cache_->Value(handle)),
FLAGS_value_bytes);
} else {
// do insert
cache_->Insert(key, createValue(thread->rnd), &helper2,
FLAGS_value_bytes, &handle);
}
} else if (random_op < insert_threshold_) {
if (handle) {
cache_->Release(handle);
handle = nullptr;
}
// do insert
cache_->Insert(key, createValue(thread->rnd), &helper3,
FLAGS_value_bytes, &handle);
} else if (random_op < lookup_threshold_) {
if (handle) {
cache_->Release(handle);
handle = nullptr;
}
// do lookup
handle = cache_->Lookup(key, &helper2, create_cb, Cache::Priority::LOW,
true);
if (handle) {
// do something with the data
result += NPHash64(static_cast<char*>(cache_->Value(handle)),
FLAGS_value_bytes);
}
} else if (random_op < erase_threshold_) {
// do erase
cache_->Erase(key);
} else {
// Should be extremely unlikely (noop)
assert(random_op >= kHundredthUint64 * 100U);
}
thread->latency_ns_hist.Add(timer.ElapsedNanos());
}
if (handle) {
cache_->Release(handle);
handle = nullptr;
}
// Ensure computations on `result` are not optimized away.
if (result == 1) {
printf("You are extremely unlucky(2). Try again.\n");
exit(1);
}
thread->duration_us = clock->NowMicros() - start_time;
}
void PrintEnv() const {
printf("RocksDB version : %d.%d\n", kMajorVersion, kMinorVersion);
printf("Number of threads : %u\n", FLAGS_threads);
printf("Ops per thread : %" PRIu64 "\n", FLAGS_ops_per_thread);
printf("Cache size : %s\n",
BytesToHumanString(FLAGS_cache_size).c_str());
printf("Num shard bits : %u\n", FLAGS_num_shard_bits);
printf("Max key : %" PRIu64 "\n", max_key_);
printf("Resident ratio : %g\n", FLAGS_resident_ratio);
printf("Skew degree : %u\n", FLAGS_skew);
printf("Populate cache : %d\n", int{FLAGS_populate_cache});
printf("Lookup+Insert pct : %u%%\n", FLAGS_lookup_insert_percent);
printf("Insert percentage : %u%%\n", FLAGS_insert_percent);
printf("Lookup percentage : %u%%\n", FLAGS_lookup_percent);
printf("Erase percentage : %u%%\n", FLAGS_erase_percent);
std::ostringstream stats;
if (FLAGS_gather_stats) {
stats << "enabled (" << FLAGS_gather_stats_sleep_ms << "ms, "
<< FLAGS_gather_stats_entries_per_lock << "/lock)";
} else {
stats << "disabled";
}
printf("Gather stats : %s\n", stats.str().c_str());
printf("----------------------------\n");
}
};
// cache_bench -stress_cache_key is an independent embedded tool for
// estimating the probability of CacheKey collisions through simulation.
// At a high level, it simulates generating SST files over many months,
// keeping them in the DB and/or cache for some lifetime while staying
// under resource caps, and checking for any cache key collisions that
// arise among the set of live files. For efficient simulation, we make
// some simplifying "pessimistic" assumptions (that only increase the
// chance of the simulation reporting a collision relative to the chance
// of collision in practice):
// * Every generated file has a cache entry for every byte offset in the
// file (contiguous range of cache keys)
// * All of every file is cached for its entire lifetime. (Here "lifetime"
// is technically the union of DB and Cache lifetime, though we only
// model a generous DB lifetime, where space usage is always maximized.
// In a effective Cache, lifetime in cache can only substantially exceed
// lifetime in DB if there is little cache activity; cache activity is
// required to hit cache key collisions.)
//
// It would be possible to track an exact set of cache key ranges for the
// set of live files, but we would have no hope of observing collisions
// (overlap in live files) in our simulation. We need to employ some way
// of amplifying collision probability that allows us to predict the real
// collision probability by extrapolation from observed collisions. Our
// basic approach is to reduce each cache key range down to some smaller
// number of bits, and limiting to bits that are shared over the whole
// range. Now we can observe collisions using a set of smaller stripped-down
// (reduced) cache keys. Let's do some case analysis to understand why this
// works:
// * No collision in reduced key - because the reduction is a pure function
// this implies no collision in the full keys
// * Collision detected between two reduced keys - either
// * The reduction has dropped some structured uniqueness info (from one of
// session counter or file number; file offsets are never materialized here).
// This can only artificially inflate the observed and extrapolated collision
// probabilities. We only have to worry about this in designing the reduction.
// * The reduction has preserved all the structured uniqueness in the cache
// key, which means either
// * REJECTED: We have a uniqueness bug in generating cache keys, where
// structured uniqueness info should have been different but isn't. In such a
// case, increasing by 1 the number of bits kept after reduction would not
// reduce observed probabilities by half. (In our observations, the
// probabilities are reduced approximately by half.)
// * ACCEPTED: The lost unstructured uniqueness in the key determines the
// probability that an observed collision would imply an overlap in ranges.
// In short, dropping n bits from key would increase collision probability by
// 2**n, assuming those n bits have full entropy in unstructured uniqueness.
//
// But we also have to account for the key ranges based on file size. If file
// sizes are roughly 2**b offsets, using XOR in 128-bit cache keys for
// "ranges", we know from other simulations (see
// https://github.com/pdillinger/unique_id/) that that's roughly equivalent to
// (less than 2x higher collision probability) using a cache key of size
// 128 - b bits for the whole file. (This is the only place we make an
// "optimistic" assumption, which is more than offset by the real
// implementation stripping off 2 lower bits from block byte offsets for cache
// keys. The simulation assumes byte offsets, which is net pessimistic.)
//
// So to accept the extrapolation as valid, we need to be confident that all
// "lost" bits, excluding those covered by file offset, are full entropy.
// Recall that we have assumed (verifiably, safely) that other structured data
// (file number and session counter) are kept, not lost. Based on the
// implementation comments for OffsetableCacheKey, the only potential hole here
// is that we only have ~103 bits of entropy in "all new" session IDs, and in
// extreme cases, there might be only 1 DB ID. However, because the upper ~39
// bits of session ID are hashed, the combination of file number and file
// offset only has to add to 25 bits (or more) to ensure full entropy in
// unstructured uniqueness lost in the reduction. Typical file size of 32MB
// suffices (at least for simulation purposes where we assume each file offset
// occupies a cache key).
//
// Example results in comments on OffsetableCacheKey.
class StressCacheKey {
public:
void Run() {
if (FLAGS_sck_footer_unique_id) {
// Proposed footer unique IDs are DB-independent and session-independent
// (but process-dependent) which is most easily simulated here by
// assuming 1 DB and (later below) no session resets without process
// reset.
FLAGS_sck_db_count = 1;
}
// Describe the simulated workload
uint64_t mb_per_day =
uint64_t{FLAGS_sck_files_per_day} * FLAGS_sck_file_size_mb;
printf("Total cache or DBs size: %gTiB Writing %g MiB/s or %gTiB/day\n",
FLAGS_sck_file_size_mb / 1024.0 / 1024.0 *
std::pow(2.0, FLAGS_sck_table_bits),
mb_per_day / 86400.0, mb_per_day / 1024.0 / 1024.0);
// For extrapolating probability of any collisions from a number of
// observed collisions
multiplier_ = std::pow(2.0, 128 - FLAGS_sck_keep_bits) /
(FLAGS_sck_file_size_mb * 1024.0 * 1024.0);
printf(
"Multiply by %g to correct for simulation losses (but still assume "
"whole file cached)\n",
multiplier_);
restart_nfiles_ = FLAGS_sck_files_per_day / FLAGS_sck_restarts_per_day;
double without_ejection =
std::pow(1.414214, FLAGS_sck_keep_bits) / FLAGS_sck_files_per_day;
// This should be a lower bound for -sck_randomize, usually a terribly
// rough lower bound.
// If observation is worse than this, then something has gone wrong.
printf(
"Without ejection, expect random collision after %g days (%g "
"corrected)\n",
without_ejection, without_ejection * multiplier_);
double with_full_table =
std::pow(2.0, FLAGS_sck_keep_bits - FLAGS_sck_table_bits) /
FLAGS_sck_files_per_day;
// This is an alternate lower bound for -sck_randomize, usually pretty
// accurate. Our cache keys should usually perform "better than random"
// but always no worse. (If observation is substantially worse than this,
// then something has gone wrong.)
printf(
"With ejection and full table, expect random collision after %g "
"days (%g corrected)\n",
with_full_table, with_full_table * multiplier_);
collisions_ = 0;
// Run until sufficient number of observed collisions.
for (int i = 1; collisions_ < FLAGS_sck_min_collision; i++) {
RunOnce();
if (collisions_ == 0) {
printf(
"No collisions after %d x %u days "
" \n",
i, FLAGS_sck_days_per_run);
} else {
double est = 1.0 * i * FLAGS_sck_days_per_run / collisions_;
printf("%" PRIu64
" collisions after %d x %u days, est %g days between (%g "
"corrected) \n",
collisions_, i, FLAGS_sck_days_per_run, est, est * multiplier_);
}
}
}
void RunOnce() {
// Re-initialized simulated state
const size_t db_count = FLAGS_sck_db_count;
dbs_.reset(new TableProperties[db_count]{});
const size_t table_mask = (size_t{1} << FLAGS_sck_table_bits) - 1;
table_.reset(new uint64_t[table_mask + 1]{});
if (FLAGS_sck_keep_bits > 64) {
FLAGS_sck_keep_bits = 64;
}
// Details of which bits are dropped in reduction
uint32_t shift_away = 64 - FLAGS_sck_keep_bits;
// Shift away fewer potential file number bits (b) than potential
// session counter bits (a).
uint32_t shift_away_b = shift_away / 3;
uint32_t shift_away_a = shift_away - shift_away_b;
process_count_ = 0;
session_count_ = 0;
ResetProcess();
Random64 r{std::random_device{}()};
uint64_t max_file_count =
uint64_t{FLAGS_sck_files_per_day} * FLAGS_sck_days_per_run;
uint64_t file_size = FLAGS_sck_file_size_mb * uint64_t{1024} * 1024U;
uint32_t report_count = 0;
uint32_t collisions_this_run = 0;
size_t db_i = 0;
for (uint64_t file_count = 1; file_count <= max_file_count;
++file_count, ++db_i) {
// Round-robin through DBs (this faster than %)
if (db_i >= db_count) {
db_i = 0;
}
// Any other periodic actions before simulating next file
if (!FLAGS_sck_footer_unique_id && r.OneIn(FLAGS_sck_reopen_nfiles)) {
ResetSession(db_i);
} else if (r.OneIn(restart_nfiles_)) {
ResetProcess();
}
// Simulate next file
OffsetableCacheKey ock;
dbs_[db_i].orig_file_number += 1;
// skip some file numbers for other file kinds, except in footer unique
// ID, orig_file_number here tracks process-wide generated SST file
// count.
if (!FLAGS_sck_footer_unique_id) {
dbs_[db_i].orig_file_number += (r.Next() & 3);
}
bool is_stable;
BlockBasedTable::SetupBaseCacheKey(&dbs_[db_i], /* ignored */ "",
/* ignored */ 42, file_size, &ock,
&is_stable);
assert(is_stable);
// Get a representative cache key, which later we analytically generalize
// to a range.
CacheKey ck = ock.WithOffset(0);
uint64_t reduced_key;
if (FLAGS_sck_randomize) {
reduced_key = GetSliceHash64(ck.AsSlice()) >> shift_away;
} else if (FLAGS_sck_footer_unique_id) {
// Special case: keep only file number, not session counter
uint32_t a = DecodeFixed32(ck.AsSlice().data() + 4) >> shift_away_a;
uint32_t b = DecodeFixed32(ck.AsSlice().data() + 12) >> shift_away_b;
reduced_key = (uint64_t{a} << 32) + b;
} else {
// Try to keep file number and session counter (shift away other bits)
uint32_t a = DecodeFixed32(ck.AsSlice().data()) << shift_away_a;
uint32_t b = DecodeFixed32(ck.AsSlice().data() + 12) >> shift_away_b;
reduced_key = (uint64_t{a} << 32) + b;
}
if (reduced_key == 0) {
// Unlikely, but we need to exclude tracking this value because we
// use it to mean "empty" in table. This case is OK as long as we
// don't hit it often.
printf("Hit Zero! \n");
file_count--;
continue;
}
uint64_t h =
NPHash64(reinterpret_cast<char*>(&reduced_key), sizeof(reduced_key));
// Skew expected lifetimes, for high variance (super-Poisson) variance
// in actual lifetimes.
size_t pos =
std::min(Lower32of64(h) & table_mask, Upper32of64(h) & table_mask);
if (table_[pos] == reduced_key) {
collisions_this_run++;
// Our goal is to predict probability of no collisions, not expected
// number of collisions. To make the distinction, we have to get rid
// of observing correlated collisions, which this takes care of:
ResetProcess();
} else {
// Replace (end of lifetime for file that was in this slot)
table_[pos] = reduced_key;
}
if (++report_count == FLAGS_sck_files_per_day) {
report_count = 0;
// Estimate fill %
size_t incr = table_mask / 1000;
size_t sampled_count = 0;
for (size_t i = 0; i <= table_mask; i += incr) {
if (table_[i] != 0) {
sampled_count++;
}
}
// Report
printf(
"%" PRIu64 " days, %" PRIu64 " proc, %" PRIu64
" sess, %u coll, occ %g%%, ejected %g%% \r",
file_count / FLAGS_sck_files_per_day, process_count_,
session_count_, collisions_this_run, 100.0 * sampled_count / 1000.0,
100.0 * (1.0 - sampled_count / 1000.0 * table_mask / file_count));
fflush(stdout);
}
}
collisions_ += collisions_this_run;
}
void ResetSession(size_t i) {
dbs_[i].db_session_id = DBImpl::GenerateDbSessionId(nullptr);
session_count_++;
}
void ResetProcess() {
process_count_++;
DBImpl::TEST_ResetDbSessionIdGen();
for (size_t i = 0; i < FLAGS_sck_db_count; ++i) {
ResetSession(i);
}
if (FLAGS_sck_footer_unique_id) {
// For footer unique ID, this tracks process-wide generated SST file
// count.
dbs_[0].orig_file_number = 0;
}
}
private:
// Use db_session_id and orig_file_number from TableProperties
std::unique_ptr<TableProperties[]> dbs_;
std::unique_ptr<uint64_t[]> table_;
uint64_t process_count_ = 0;
uint64_t session_count_ = 0;
uint64_t collisions_ = 0;
uint32_t restart_nfiles_ = 0;
double multiplier_ = 0.0;
};
int cache_bench_tool(int argc, char** argv) {
ParseCommandLineFlags(&argc, &argv, true);
if (FLAGS_stress_cache_key) {
// Alternate tool
StressCacheKey().Run();
return 0;
}
if (FLAGS_threads <= 0) {
fprintf(stderr, "threads number <= 0\n");
exit(1);
}
ROCKSDB_NAMESPACE::CacheBench bench;
if (FLAGS_populate_cache) {
bench.PopulateCache();
printf("Population complete\n");
printf("----------------------------\n");
}
if (bench.Run()) {
return 0;
} else {
return 1;
}
} // namespace ROCKSDB_NAMESPACE
} // namespace ROCKSDB_NAMESPACE
#endif // GFLAGS

View File

@ -1,128 +0,0 @@
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "cache/cache_entry_roles.h"
#include <mutex>
#include "port/lang.h"
namespace ROCKSDB_NAMESPACE {
std::array<std::string, kNumCacheEntryRoles> kCacheEntryRoleToCamelString{{
"DataBlock",
"FilterBlock",
"FilterMetaBlock",
"DeprecatedFilterBlock",
"IndexBlock",
"OtherBlock",
"WriteBuffer",
"CompressionDictionaryBuildingBuffer",
"FilterConstruction",
"BlockBasedTableReader",
"Misc",
}};
std::array<std::string, kNumCacheEntryRoles> kCacheEntryRoleToHyphenString{{
"data-block",
"filter-block",
"filter-meta-block",
"deprecated-filter-block",
"index-block",
"other-block",
"write-buffer",
"compression-dictionary-building-buffer",
"filter-construction",
"block-based-table-reader",
"misc",
}};
const std::string& GetCacheEntryRoleName(CacheEntryRole role) {
return kCacheEntryRoleToHyphenString[static_cast<size_t>(role)];
}
const std::string& BlockCacheEntryStatsMapKeys::CacheId() {
static const std::string kCacheId = "id";
return kCacheId;
}
const std::string& BlockCacheEntryStatsMapKeys::CacheCapacityBytes() {
static const std::string kCacheCapacityBytes = "capacity";
return kCacheCapacityBytes;
}
const std::string&
BlockCacheEntryStatsMapKeys::LastCollectionDurationSeconds() {
static const std::string kLastCollectionDurationSeconds =
"secs_for_last_collection";
return kLastCollectionDurationSeconds;
}
const std::string& BlockCacheEntryStatsMapKeys::LastCollectionAgeSeconds() {
static const std::string kLastCollectionAgeSeconds =
"secs_since_last_collection";
return kLastCollectionAgeSeconds;
}
namespace {
std::string GetPrefixedCacheEntryRoleName(const std::string& prefix,
CacheEntryRole role) {
const std::string& role_name = GetCacheEntryRoleName(role);
std::string prefixed_role_name;
prefixed_role_name.reserve(prefix.size() + role_name.size());
prefixed_role_name.append(prefix);
prefixed_role_name.append(role_name);
return prefixed_role_name;
}
} // namespace
std::string BlockCacheEntryStatsMapKeys::EntryCount(CacheEntryRole role) {
const static std::string kPrefix = "count.";
return GetPrefixedCacheEntryRoleName(kPrefix, role);
}
std::string BlockCacheEntryStatsMapKeys::UsedBytes(CacheEntryRole role) {
const static std::string kPrefix = "bytes.";
return GetPrefixedCacheEntryRoleName(kPrefix, role);
}
std::string BlockCacheEntryStatsMapKeys::UsedPercent(CacheEntryRole role) {
const static std::string kPrefix = "percent.";
return GetPrefixedCacheEntryRoleName(kPrefix, role);
}
namespace {
struct Registry {
std::mutex mutex;
UnorderedMap<Cache::DeleterFn, CacheEntryRole> role_map;
void Register(Cache::DeleterFn fn, CacheEntryRole role) {
std::lock_guard<std::mutex> lock(mutex);
role_map[fn] = role;
}
UnorderedMap<Cache::DeleterFn, CacheEntryRole> Copy() {
std::lock_guard<std::mutex> lock(mutex);
return role_map;
}
};
Registry& GetRegistry() {
STATIC_AVOID_DESTRUCTION(Registry, registry);
return registry;
}
} // namespace
void RegisterCacheDeleterRole(Cache::DeleterFn fn, CacheEntryRole role) {
GetRegistry().Register(fn, role);
}
UnorderedMap<Cache::DeleterFn, CacheEntryRole> CopyCacheDeleterRoleMap() {
return GetRegistry().Copy();
}
} // namespace ROCKSDB_NAMESPACE

View File

@ -1,103 +0,0 @@
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#include <array>
#include <cstdint>
#include <memory>
#include <type_traits>
#include "rocksdb/cache.h"
#include "util/hash_containers.h"
namespace ROCKSDB_NAMESPACE {
extern std::array<std::string, kNumCacheEntryRoles>
kCacheEntryRoleToCamelString;
extern std::array<std::string, kNumCacheEntryRoles>
kCacheEntryRoleToHyphenString;
// To associate cache entries with their role, we use a hack on the
// existing Cache interface. Because the deleter of an entry can authenticate
// the code origin of an entry, we can elaborate the choice of deleter to
// also encode role information, without inferring false role information
// from entries not choosing to encode a role.
//
// The rest of this file is for handling mappings between deleters and
// roles.
// To infer a role from a deleter, the deleter must be registered. This
// can be done "manually" with this function. This function is thread-safe,
// and the registration mappings go into private but static storage. (Note
// that DeleterFn is a function pointer, not std::function. Registrations
// should not be too many.)
void RegisterCacheDeleterRole(Cache::DeleterFn fn, CacheEntryRole role);
// Gets a copy of the registered deleter -> role mappings. This is the only
// function for reading the mappings made with RegisterCacheDeleterRole.
// Why only this interface for reading?
// * This function has to be thread safe, which could incur substantial
// overhead. We should not pay this overhead for every deleter look-up.
// * This is suitable for preparing for batch operations, like with
// CacheEntryStatsCollector.
// * The number of mappings should be sufficiently small (dozens).
UnorderedMap<Cache::DeleterFn, CacheEntryRole> CopyCacheDeleterRoleMap();
// ************************************************************** //
// An automatic registration infrastructure. This enables code
// to simply ask for a deleter associated with a particular type
// and role, and registration is automatic. In a sense, this is
// a small dependency injection infrastructure, because linking
// in new deleter instantiations is essentially sufficient for
// making stats collection (using CopyCacheDeleterRoleMap) aware
// of them.
namespace cache_entry_roles_detail {
template <typename T, CacheEntryRole R>
struct RegisteredDeleter {
RegisteredDeleter() { RegisterCacheDeleterRole(Delete, R); }
// These have global linkage to help ensure compiler optimizations do not
// break uniqueness for each <T,R>
static void Delete(const Slice& /* key */, void* value) {
// Supports T == Something[], unlike delete operator
std::default_delete<T>()(
static_cast<typename std::remove_extent<T>::type*>(value));
}
};
template <CacheEntryRole R>
struct RegisteredNoopDeleter {
RegisteredNoopDeleter() { RegisterCacheDeleterRole(Delete, R); }
static void Delete(const Slice& /* key */, void* /* value */) {
// Here was `assert(value == nullptr);` but we can also put pointers
// to static data in Cache, for testing at least.
}
};
} // namespace cache_entry_roles_detail
// Get an automatically registered deleter for value type T and role R.
// Based on C++ semantics, registration is invoked exactly once in a
// thread-safe way on first call to this function, for each <T, R>.
template <typename T, CacheEntryRole R>
Cache::DeleterFn GetCacheEntryDeleterForRole() {
static cache_entry_roles_detail::RegisteredDeleter<T, R> reg;
return reg.Delete;
}
// Get an automatically registered no-op deleter (value should be nullptr)
// and associated with role R. This is used for Cache "reservation" entries
// such as for WriteBufferManager.
template <CacheEntryRole R>
Cache::DeleterFn GetNoopDeleterForRole() {
static cache_entry_roles_detail::RegisteredNoopDeleter<R> reg;
return reg.Delete;
}
} // namespace ROCKSDB_NAMESPACE

View File

@ -1,183 +0,0 @@
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#include <array>
#include <cstdint>
#include <memory>
#include <mutex>
#include "cache/cache_helpers.h"
#include "cache/cache_key.h"
#include "port/lang.h"
#include "rocksdb/cache.h"
#include "rocksdb/status.h"
#include "rocksdb/system_clock.h"
#include "test_util/sync_point.h"
#include "util/coding_lean.h"
namespace ROCKSDB_NAMESPACE {
// A generic helper object for gathering stats about cache entries by
// iterating over them with ApplyToAllEntries. This class essentially
// solves the problem of slowing down a Cache with too many stats
// collectors that could be sharing stat results, such as from multiple
// column families or multiple DBs sharing a Cache. We employ a few
// mitigations:
// * Only one collector for a particular kind of Stats is alive
// for each Cache. This is guaranteed using the Cache itself to hold
// the collector.
// * A mutex ensures only one thread is gathering stats for this
// collector.
// * The most recent gathered stats are saved and simply copied to
// satisfy requests within a time window (default: 3 minutes) of
// completion of the most recent stat gathering.
//
// Template parameter Stats must be copyable and trivially constructable,
// as well as...
// concept Stats {
// // Notification before applying callback to all entries
// void BeginCollection(Cache*, SystemClock*, uint64_t start_time_micros);
// // Get the callback to apply to all entries. `callback`
// // type must be compatible with Cache::ApplyToAllEntries
// callback GetEntryCallback();
// // Notification after applying callback to all entries
// void EndCollection(Cache*, SystemClock*, uint64_t end_time_micros);
// // Notification that a collection was skipped because of
// // sufficiently recent saved results.
// void SkippedCollection();
// }
template <class Stats>
class CacheEntryStatsCollector {
public:
// Gather and save stats if saved stats are too old. (Use GetStats() to
// read saved stats.)
//
// Maximum allowed age for a "hit" on saved results is determined by the
// two interval parameters. Both set to 0 forces a re-scan. For example
// with min_interval_seconds=300 and min_interval_factor=100, if the last
// scan took 10s, we would only rescan ("miss") if the age in seconds of
// the saved results is > max(300, 100*10).
// Justification: scans can vary wildly in duration, e.g. from 0.02 sec
// to as much as 20 seconds, so we want to be able to cap the absolute
// and relative frequency of scans.
void CollectStats(int min_interval_seconds, int min_interval_factor) {
// Waits for any pending reader or writer (collector)
std::lock_guard<std::mutex> lock(working_mutex_);
uint64_t max_age_micros =
static_cast<uint64_t>(std::max(min_interval_seconds, 0)) * 1000000U;
if (last_end_time_micros_ > last_start_time_micros_ &&
min_interval_factor > 0) {
max_age_micros = std::max(
max_age_micros, min_interval_factor * (last_end_time_micros_ -
last_start_time_micros_));
}
uint64_t start_time_micros = clock_->NowMicros();
if ((start_time_micros - last_end_time_micros_) > max_age_micros) {
last_start_time_micros_ = start_time_micros;
working_stats_.BeginCollection(cache_, clock_, start_time_micros);
cache_->ApplyToAllEntries(working_stats_.GetEntryCallback(), {});
TEST_SYNC_POINT_CALLBACK(
"CacheEntryStatsCollector::GetStats:AfterApplyToAllEntries", nullptr);
uint64_t end_time_micros = clock_->NowMicros();
last_end_time_micros_ = end_time_micros;
working_stats_.EndCollection(cache_, clock_, end_time_micros);
} else {
working_stats_.SkippedCollection();
}
// Save so that we don't need to wait for an outstanding collection in
// order to make of copy of the last saved stats
std::lock_guard<std::mutex> lock2(saved_mutex_);
saved_stats_ = working_stats_;
}
// Gets saved stats, regardless of age
void GetStats(Stats *stats) {
std::lock_guard<std::mutex> lock(saved_mutex_);
*stats = saved_stats_;
}
Cache *GetCache() const { return cache_; }
// Gets or creates a shared instance of CacheEntryStatsCollector in the
// cache itself, and saves into `ptr`. This shared_ptr will hold the
// entry in cache until all refs are destroyed.
static Status GetShared(Cache *cache, SystemClock *clock,
std::shared_ptr<CacheEntryStatsCollector> *ptr) {
const Slice &cache_key = GetCacheKey();
Cache::Handle *h = cache->Lookup(cache_key);
if (h == nullptr) {
// Not yet in cache, but Cache doesn't provide a built-in way to
// avoid racing insert. So we double-check under a shared mutex,
// inspired by TableCache.
STATIC_AVOID_DESTRUCTION(std::mutex, static_mutex);
std::lock_guard<std::mutex> lock(static_mutex);
h = cache->Lookup(cache_key);
if (h == nullptr) {
auto new_ptr = new CacheEntryStatsCollector(cache, clock);
// TODO: non-zero charge causes some tests that count block cache
// usage to go flaky. Fix the problem somehow so we can use an
// accurate charge.
size_t charge = 0;
Status s = cache->Insert(cache_key, new_ptr, charge, Deleter, &h,
Cache::Priority::HIGH);
if (!s.ok()) {
assert(h == nullptr);
delete new_ptr;
return s;
}
}
}
// If we reach here, shared entry is in cache with handle `h`.
assert(cache->GetDeleter(h) == Deleter);
// Build an aliasing shared_ptr that keeps `ptr` in cache while there
// are references.
*ptr = MakeSharedCacheHandleGuard<CacheEntryStatsCollector>(cache, h);
return Status::OK();
}
private:
explicit CacheEntryStatsCollector(Cache *cache, SystemClock *clock)
: saved_stats_(),
working_stats_(),
last_start_time_micros_(0),
last_end_time_micros_(/*pessimistic*/ 10000000),
cache_(cache),
clock_(clock) {}
static void Deleter(const Slice &, void *value) {
delete static_cast<CacheEntryStatsCollector *>(value);
}
static const Slice &GetCacheKey() {
// For each template instantiation
static CacheKey ckey = CacheKey::CreateUniqueForProcessLifetime();
static Slice ckey_slice = ckey.AsSlice();
return ckey_slice;
}
std::mutex saved_mutex_;
Stats saved_stats_;
std::mutex working_mutex_;
Stats working_stats_;
uint64_t last_start_time_micros_;
uint64_t last_end_time_micros_;
Cache *const cache_;
SystemClock *const clock_;
};
} // namespace ROCKSDB_NAMESPACE

125
cache/cache_helpers.h vendored
View File

@ -1,125 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#include <cassert>
#include "rocksdb/cache.h"
#include "rocksdb/rocksdb_namespace.h"
namespace ROCKSDB_NAMESPACE {
// Returns the cached value given a cache handle.
template <typename T>
T* GetFromCacheHandle(Cache* cache, Cache::Handle* handle) {
assert(cache);
assert(handle);
return static_cast<T*>(cache->Value(handle));
}
// Simple generic deleter for Cache (to be used with Cache::Insert).
template <typename T>
void DeleteCacheEntry(const Slice& /* key */, void* value) {
delete static_cast<T*>(value);
}
// Turns a T* into a Slice so it can be used as a key with Cache.
template <typename T>
Slice GetSlice(const T* t) {
return Slice(reinterpret_cast<const char*>(t), sizeof(T));
}
// Generic resource management object for cache handles that releases the handle
// when destroyed. Has unique ownership of the handle, so copying it is not
// allowed, while moving it transfers ownership.
template <typename T>
class CacheHandleGuard {
public:
CacheHandleGuard() = default;
CacheHandleGuard(Cache* cache, Cache::Handle* handle)
: cache_(cache),
handle_(handle),
value_(GetFromCacheHandle<T>(cache, handle)) {
assert(cache_ && handle_ && value_);
}
CacheHandleGuard(const CacheHandleGuard&) = delete;
CacheHandleGuard& operator=(const CacheHandleGuard&) = delete;
CacheHandleGuard(CacheHandleGuard&& rhs) noexcept
: cache_(rhs.cache_), handle_(rhs.handle_), value_(rhs.value_) {
assert((!cache_ && !handle_ && !value_) || (cache_ && handle_ && value_));
rhs.ResetFields();
}
CacheHandleGuard& operator=(CacheHandleGuard&& rhs) noexcept {
if (this == &rhs) {
return *this;
}
ReleaseHandle();
cache_ = rhs.cache_;
handle_ = rhs.handle_;
value_ = rhs.value_;
assert((!cache_ && !handle_ && !value_) || (cache_ && handle_ && value_));
rhs.ResetFields();
return *this;
}
~CacheHandleGuard() { ReleaseHandle(); }
bool IsEmpty() const { return !handle_; }
Cache* GetCache() const { return cache_; }
Cache::Handle* GetCacheHandle() const { return handle_; }
T* GetValue() const { return value_; }
void Reset() {
ReleaseHandle();
ResetFields();
}
private:
void ReleaseHandle() {
if (IsEmpty()) {
return;
}
assert(cache_);
cache_->Release(handle_);
}
void ResetFields() {
cache_ = nullptr;
handle_ = nullptr;
value_ = nullptr;
}
private:
Cache* cache_ = nullptr;
Cache::Handle* handle_ = nullptr;
T* value_ = nullptr;
};
// Build an aliasing shared_ptr that keeps `handle` in cache while there
// are references, but the pointer is to the value for that cache entry,
// which must be of type T. This is copyable, unlike CacheHandleGuard, but
// does not provide access to caching details.
template <typename T>
std::shared_ptr<T> MakeSharedCacheHandleGuard(Cache* cache,
Cache::Handle* handle) {
auto wrapper = std::make_shared<CacheHandleGuard<T>>(cache, handle);
return std::shared_ptr<T>(wrapper, static_cast<T*>(cache->Value(handle)));
}
} // namespace ROCKSDB_NAMESPACE

344
cache/cache_key.cc vendored
View File

@ -1,344 +0,0 @@
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "cache/cache_key.h"
#include <algorithm>
#include <atomic>
#include "rocksdb/cache.h"
#include "table/unique_id_impl.h"
#include "util/hash.h"
#include "util/math.h"
namespace ROCKSDB_NAMESPACE {
// Value space plan for CacheKey:
//
// session_etc64_ | offset_etc64_ | Only generated by
// ---------------+---------------+------------------------------------------
// 0 | 0 | Reserved for "empty" CacheKey()
// 0 | > 0, < 1<<63 | CreateUniqueForCacheLifetime
// 0 | >= 1<<63 | CreateUniqueForProcessLifetime
// > 0 | any | OffsetableCacheKey.WithOffset
CacheKey CacheKey::CreateUniqueForCacheLifetime(Cache *cache) {
// +1 so that we can reserve all zeros for "unset" cache key
uint64_t id = cache->NewId() + 1;
// Ensure we don't collide with CreateUniqueForProcessLifetime
assert((id >> 63) == 0U);
return CacheKey(0, id);
}
CacheKey CacheKey::CreateUniqueForProcessLifetime() {
// To avoid colliding with CreateUniqueForCacheLifetime, assuming
// Cache::NewId counts up from zero, here we count down from UINT64_MAX.
// If this ever becomes a point of contention, we could sub-divide the
// space and use CoreLocalArray.
static std::atomic<uint64_t> counter{UINT64_MAX};
uint64_t id = counter.fetch_sub(1, std::memory_order_relaxed);
// Ensure we don't collide with CreateUniqueForCacheLifetime
assert((id >> 63) == 1U);
return CacheKey(0, id);
}
// Value plan for CacheKeys from OffsetableCacheKey, assuming that
// db_session_ids are generated from a base_session_id and
// session_id_counter (by SemiStructuredUniqueIdGen+EncodeSessionId
// in DBImpl::GenerateDbSessionId):
//
// Conceptual inputs:
// db_id (unstructured, from GenerateRawUniqueId or equiv)
// * could be shared between cloned DBs but rare
// * could be constant, if session id suffices
// base_session_id (unstructured, from GenerateRawUniqueId)
// session_id_counter (structured)
// * usually much smaller than 2**24
// file_number (structured)
// * usually smaller than 2**24
// offset_in_file (structured, might skip lots of values)
// * usually smaller than 2**32
// max_offset determines placement of file_number to prevent
// overlapping with offset
//
// Outputs come from bitwise-xor of the constituent pieces, low bits on left:
//
// |------------------------- session_etc64 -------------------------|
// | +++++++++++++++ base_session_id (lower 64 bits) +++++++++++++++ |
// |-----------------------------------------------------------------|
// | session_id_counter ...| |
// |-----------------------------------------------------------------|
// | | ... file_number |
// | | overflow & meta |
// |-----------------------------------------------------------------|
//
//
// |------------------------- offset_etc64 --------------------------|
// | hash of: ++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
// | * base_session_id (upper ~39 bits) |
// | * db_id (~122 bits entropy) |
// |-----------------------------------------------------------------|
// | offset_in_file ............... | |
// |-----------------------------------------------------------------|
// | | file_number, 0-3 |
// | | lower bytes |
// |-----------------------------------------------------------------|
//
// Based on max_offset, a maximal number of bytes 0..3 is chosen for
// including from lower bits of file_number in offset_etc64. The choice
// is encoded in two bits of metadata going into session_etc64, though
// the common case of 3 bytes is encoded as 0 so that session_etc64
// is unmodified by file_number concerns in the common case.
//
// There is nothing preventing "file number overflow & meta" from meeting
// and overlapping with session_id_counter, but reaching such a case requires
// an intractable combination of large file offsets (thus at least some large
// files), large file numbers (thus large number of files generated), and
// large number of session IDs generated in a single process. A trillion each
// (2**40) of session ids, offsets, and file numbers comes to 120 bits.
// With two bits of metadata and byte granularity, this is on the verge of
// overlap, but even in the overlap case, it doesn't seem likely that
// a file from billions of files or session ids ago will still be live
// or cached.
//
// In fact, if our SST files are all < 4TB (see
// BlockBasedTable::kMaxFileSizeStandardEncoding), then SST files generated
// in a single process are guaranteed to have unique cache keys, unless/until
// number session ids * max file number = 2**86, e.g. 1 trillion DB::Open in
// a single process and 64 trillion files generated. Even at that point, to
// see a collision we would need a miraculous re-synchronization of session
// id and file number, along with a live file or stale cache entry from
// trillions of files ago.
//
// How https://github.com/pdillinger/unique_id applies here:
// Every bit of output always includes "unstructured" uniqueness bits and
// often combines with "structured" uniqueness bits. The "unstructured" bits
// change infrequently: only when we cannot guarantee our state tracking for
// "structured" uniqueness hasn't been cloned. Using a static
// SemiStructuredUniqueIdGen for db_session_ids, this means we only get an
// "all new" session id when a new process uses RocksDB. (Between processes,
// we don't know if a DB or other persistent storage has been cloned. We
// assume that if VM hot cloning is used, subsequently generated SST files
// do not interact.) Within a process, only the session_lower of the
// db_session_id changes incrementally ("structured" uniqueness).
//
// This basically means that our offsets, counters and file numbers allow us
// to do somewhat "better than random" (birthday paradox) while in the
// degenerate case of completely new session for each tiny file, we still
// have strong uniqueness properties from the birthday paradox, with ~103
// bit session IDs or up to 128 bits entropy with different DB IDs sharing a
// cache.
//
// More collision probability analysis:
// Suppose a RocksDB host generates (generously) 2 GB/s (10TB data, 17 DWPD)
// with average process/session lifetime of (pessimistically) 4 minutes.
// In 180 days (generous allowable data lifespan), we generate 31 million GB
// of data, or 2^55 bytes, and 2^16 "all new" session IDs.
//
// First, suppose this is in a single DB (lifetime 180 days):
// 128 bits cache key size
// - 55 <- ideal size for byte offsets + file numbers
// - 2 <- bits for offsets and file numbers not exactly powers of two
// - 2 <- bits for file number encoding metadata
// + 2 <- bits saved not using byte offsets in BlockBasedTable::GetCacheKey
// ----
// 71 <- bits remaining for distinguishing session IDs
// The probability of a collision in 71 bits of session ID data is less than
// 1 in 2**(71 - (2 * 16)), or roughly 1 in a trillion. And this assumes all
// data from the last 180 days is in cache for potential collision, and that
// cache keys under each session id exhaustively cover the remaining 57 bits
// while in reality they'll only cover a small fraction of it.
//
// Although data could be transferred between hosts, each host has its own
// cache and we are already assuming a high rate of "all new" session ids.
// So this doesn't really change the collision calculation. Across a fleet
// of 1 million, each with <1 in a trillion collision possibility,
// fleetwide collision probability is <1 in a million.
//
// Now suppose we have many DBs per host, say 2**10, with same host-wide write
// rate and process/session lifetime. File numbers will be ~10 bits smaller
// and we will have 2**10 times as many session IDs because of simultaneous
// lifetimes. So now collision chance is less than 1 in 2**(81 - (2 * 26)),
// or roughly 1 in a billion.
//
// Suppose instead we generated random or hashed cache keys for each
// (compressed) block. For 1KB compressed block size, that is 2^45 cache keys
// in 180 days. Collision probability is more easily estimated at roughly
// 1 in 2**(128 - (2 * 45)) or roughly 1 in a trillion (assuming all
// data from the last 180 days is in cache, but NOT the other assumption
// for the 1 in a trillion estimate above).
//
//
// Collision probability estimation through simulation:
// A tool ./cache_bench -stress_cache_key broadly simulates host-wide cache
// activity over many months, by making some pessimistic simplifying
// assumptions. See class StressCacheKey in cache_bench_tool.cc for details.
// Here is some sample output with
// `./cache_bench -stress_cache_key -sck_keep_bits=40`:
//
// Total cache or DBs size: 32TiB Writing 925.926 MiB/s or 76.2939TiB/day
// Multiply by 9.22337e+18 to correct for simulation losses (but still
// assume whole file cached)
//
// These come from default settings of 2.5M files per day of 32 MB each, and
// `-sck_keep_bits=40` means that to represent a single file, we are only
// keeping 40 bits of the 128-bit (base) cache key. With file size of 2**25
// contiguous keys (pessimistic), our simulation is about 2\*\*(128-40-25) or
// about 9 billion billion times more prone to collision than reality.
//
// More default assumptions, relatively pessimistic:
// * 100 DBs in same process (doesn't matter much)
// * Re-open DB in same process (new session ID related to old session ID) on
// average every 100 files generated
// * Restart process (all new session IDs unrelated to old) 24 times per day
//
// After enough data, we get a result at the end (-sck_keep_bits=40):
//
// (keep 40 bits) 17 collisions after 2 x 90 days, est 10.5882 days between
// (9.76592e+19 corrected)
//
// If we believe the (pessimistic) simulation and the mathematical
// extrapolation, we would need to run a billion machines all for 97 billion
// days to expect a cache key collision. To help verify that our extrapolation
// ("corrected") is robust, we can make our simulation more precise with
// `-sck_keep_bits=41` and `42`, which takes more running time to get enough
// collision data:
//
// (keep 41 bits) 16 collisions after 4 x 90 days, est 22.5 days between
// (1.03763e+20 corrected)
// (keep 42 bits) 19 collisions after 10 x 90 days, est 47.3684 days between
// (1.09224e+20 corrected)
//
// The extrapolated prediction is very close. If anything, we might have some
// very small losses of structured data (see class StressCacheKey in
// cache_bench_tool.cc) leading to more accurate & more attractive prediction
// with more bits kept.
//
// With the `-sck_randomize` option, we can see that typical workloads like
// above have lower collision probability than "random" cache keys (note:
// offsets still non-randomized) by a modest amount (roughly 20x less collision
// prone than random), which should make us reasonably comfortable even in
// "degenerate" cases (e.g. repeatedly launch a process to generate 1 file
// with SstFileWriter):
//
// (rand 40 bits) 197 collisions after 1 x 90 days, est 0.456853 days between
// (4.21372e+18 corrected)
//
// We can see that with more frequent process restarts (all new session IDs),
// we get closer to the "random" cache key performance:
//
// (-sck_restarts_per_day=5000): 140 collisions after 1 x 90 days, ...
// (5.92931e+18 corrected)
//
// Other tests have been run to validate other conditions behave as expected,
// never behaving "worse than random" unless we start chopping off structured
// data.
//
//
// Conclusion: Even in extreme cases, rapidly burning through "all new" IDs
// that only arise when a new process is started, the chance of any cache key
// collisions in a giant fleet of machines is negligible. Especially when
// processes live for hours or days, the chance of a cache key collision is
// likely more plausibly due to bad hardware than to bad luck in random
// session ID data. Software defects are surely more likely to cause corruption
// than both of those.
//
// TODO: Nevertheless / regardless, an efficient way to detect (and thus
// quantify) block cache corruptions, including collisions, should be added.
OffsetableCacheKey::OffsetableCacheKey(const std::string &db_id,
const std::string &db_session_id,
uint64_t file_number,
uint64_t max_offset) {
#ifndef NDEBUG
max_offset_ = max_offset;
#endif
// Closely related to GetSstInternalUniqueId, but only need 128 bits and
// need to include an offset within the file.
// See also https://github.com/pdillinger/unique_id for background.
uint64_t session_upper = 0; // Assignment to appease clang-analyze
uint64_t session_lower = 0; // Assignment to appease clang-analyze
{
Status s = DecodeSessionId(db_session_id, &session_upper, &session_lower);
if (!s.ok()) {
// A reasonable fallback in case malformed
Hash2x64(db_session_id.data(), db_session_id.size(), &session_upper,
&session_lower);
}
}
// Hash the session upper (~39 bits entropy) and DB id (120+ bits entropy)
// for more global uniqueness entropy.
// (It is possible that many DBs descended from one common DB id are copied
// around and proliferate, in which case session id is critical, but it is
// more common for different DBs to have different DB ids.)
uint64_t db_hash = Hash64(db_id.data(), db_id.size(), session_upper);
// This establishes the db+session id part of the cache key.
//
// Exactly preserve (in common cases; see modifiers below) session lower to
// ensure that session ids generated during the same process lifetime are
// guaranteed unique.
//
// We put this first for CommonPrefixSlice(), so that a small-ish set of
// cache key prefixes to cover entries relevant to any DB.
session_etc64_ = session_lower;
// This provides extra entopy in case of different DB id or process
// generating a session id, but is also partly/variably obscured by
// file_number and offset (see below).
offset_etc64_ = db_hash;
// Into offset_etc64_ we are (eventually) going to pack & xor in an offset and
// a file_number, but we might need the file_number to overflow into
// session_etc64_. (There must only be one session_etc64_ value per
// file, and preferably shared among many files.)
//
// Figure out how many bytes of file_number we are going to be able to
// pack in with max_offset, though our encoding will only support packing
// in up to 3 bytes of file_number. (16M file numbers is enough for a new
// file number every second for half a year.)
int file_number_bytes_in_offset_etc =
(63 - FloorLog2(max_offset | 0x100000000U)) / 8;
int file_number_bits_in_offset_etc = file_number_bytes_in_offset_etc * 8;
// Assert two bits of metadata
assert(file_number_bytes_in_offset_etc >= 0 &&
file_number_bytes_in_offset_etc <= 3);
// Assert we couldn't have used a larger allowed number of bytes (shift
// would chop off bytes).
assert(file_number_bytes_in_offset_etc == 3 ||
(max_offset << (file_number_bits_in_offset_etc + 8) >>
(file_number_bits_in_offset_etc + 8)) != max_offset);
uint64_t mask = (uint64_t{1} << (file_number_bits_in_offset_etc)) - 1;
// Pack into high bits of etc so that offset can go in low bits of etc
// TODO: could be EndianSwapValue?
uint64_t offset_etc_modifier = ReverseBits(file_number & mask);
assert(offset_etc_modifier << file_number_bits_in_offset_etc == 0U);
// Overflow and 3 - byte count (likely both zero) go into session_id part
uint64_t session_etc_modifier =
(file_number >> file_number_bits_in_offset_etc << 2) |
static_cast<uint64_t>(3 - file_number_bytes_in_offset_etc);
// Packed into high bits to minimize interference with session id counter.
session_etc_modifier = ReverseBits(session_etc_modifier);
// Assert session_id part is only modified in extreme cases
assert(session_etc_modifier == 0 || file_number > /*3 bytes*/ 0xffffffU ||
max_offset > /*5 bytes*/ 0xffffffffffU);
// Xor in the modifiers
session_etc64_ ^= session_etc_modifier;
offset_etc64_ ^= offset_etc_modifier;
// Although DBImpl guarantees (in recent versions) that session_lower is not
// zero, that's not entirely sufficient to guarantee that session_etc64_ is
// not zero (so that the 0 case can be used by CacheKey::CreateUnique*)
if (session_etc64_ == 0U) {
session_etc64_ = session_upper | 1U;
}
assert(session_etc64_ != 0);
}
} // namespace ROCKSDB_NAMESPACE

132
cache/cache_key.h vendored
View File

@ -1,132 +0,0 @@
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#include <cstdint>
#include "rocksdb/rocksdb_namespace.h"
#include "rocksdb/slice.h"
namespace ROCKSDB_NAMESPACE {
class Cache;
// A standard holder for fixed-size block cache keys (and for related caches).
// They are created through one of these, each using its own range of values:
// * CacheKey::CreateUniqueForCacheLifetime
// * CacheKey::CreateUniqueForProcessLifetime
// * Default ctor ("empty" cache key)
// * OffsetableCacheKey->WithOffset
//
// The first two use atomic counters to guarantee uniqueness over the given
// lifetime and the last uses a form of universally unique identifier for
// uniqueness with very high probabilty (and guaranteed for files generated
// during a single process lifetime).
//
// CacheKeys are currently used by calling AsSlice() to pass as a key to
// Cache. For performance, the keys are endianness-dependent (though otherwise
// portable). (Persistable cache entries are not intended to cross platforms.)
class CacheKey {
public:
// For convenience, constructs an "empty" cache key that is never returned
// by other means.
inline CacheKey() : session_etc64_(), offset_etc64_() {}
inline bool IsEmpty() const {
return (session_etc64_ == 0) & (offset_etc64_ == 0);
}
// Use this cache key as a Slice (byte order is endianness-dependent)
inline Slice AsSlice() const {
static_assert(sizeof(*this) == 16, "Standardized on 16-byte cache key");
assert(!IsEmpty());
return Slice(reinterpret_cast<const char *>(this), sizeof(*this));
}
// Create a CacheKey that is unique among others associated with this Cache
// instance. Depends on Cache::NewId. This is useful for block cache
// "reservations".
static CacheKey CreateUniqueForCacheLifetime(Cache *cache);
// Create a CacheKey that is unique among others for the lifetime of this
// process. This is useful for saving in a static data member so that
// different DB instances can agree on a cache key for shared entities,
// such as for CacheEntryStatsCollector.
static CacheKey CreateUniqueForProcessLifetime();
protected:
friend class OffsetableCacheKey;
CacheKey(uint64_t session_etc64, uint64_t offset_etc64)
: session_etc64_(session_etc64), offset_etc64_(offset_etc64) {}
uint64_t session_etc64_;
uint64_t offset_etc64_;
};
// A file-specific generator of cache keys, sometimes referred to as the
// "base" cache key for a file because all the cache keys for various offsets
// within the file are computed using simple arithmetic. The basis for the
// general approach is dicussed here: https://github.com/pdillinger/unique_id
// Heavily related to GetUniqueIdFromTableProperties.
//
// If the db_id, db_session_id, and file_number come from the file's table
// properties, then the keys will be stable across DB::Open/Close, backup/
// restore, import/export, etc.
//
// This class "is a" CacheKey only privately so that it is not misused as
// a ready-to-use CacheKey.
class OffsetableCacheKey : private CacheKey {
public:
// For convenience, constructs an "empty" cache key that should not be used.
inline OffsetableCacheKey() : CacheKey() {}
// Constructs an OffsetableCacheKey with the given information about a file.
// max_offset is based on file size (see WithOffset) and is required here to
// choose an appropriate (sub-)encoding. This constructor never generates an
// "empty" base key.
OffsetableCacheKey(const std::string &db_id, const std::string &db_session_id,
uint64_t file_number, uint64_t max_offset);
inline bool IsEmpty() const {
bool result = session_etc64_ == 0;
assert(!(offset_etc64_ > 0 && result));
return result;
}
// Construct a CacheKey for an offset within a file, which must be
// <= max_offset provided in constructor. An offset is not necessarily a
// byte offset if a smaller unique identifier of keyable offsets is used.
//
// This class was designed to make this hot code extremely fast.
inline CacheKey WithOffset(uint64_t offset) const {
assert(!IsEmpty());
assert(offset <= max_offset_);
return CacheKey(session_etc64_, offset_etc64_ ^ offset);
}
// The "common prefix" is a shared prefix for all the returned CacheKeys,
// that also happens to usually be the same among many files in the same DB,
// so is efficient and highly accurate (not perfectly) for DB-specific cache
// dump selection (but not file-specific).
static constexpr size_t kCommonPrefixSize = 8;
inline Slice CommonPrefixSlice() const {
static_assert(sizeof(session_etc64_) == kCommonPrefixSize,
"8 byte common prefix expected");
assert(!IsEmpty());
assert(&this->session_etc64_ == static_cast<const void *>(this));
return Slice(reinterpret_cast<const char *>(this), kCommonPrefixSize);
}
// For any max_offset <= this value, the same encoding scheme is guaranteed.
static constexpr uint64_t kMaxOffsetStandardEncoding = 0xffffffffffU;
private:
#ifndef NDEBUG
uint64_t max_offset_ = 0;
#endif
};
} // namespace ROCKSDB_NAMESPACE

View File

@ -1,183 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "cache/cache_reservation_manager.h"
#include <cassert>
#include <cstddef>
#include <cstring>
#include <memory>
#include "cache/cache_entry_roles.h"
#include "rocksdb/cache.h"
#include "rocksdb/slice.h"
#include "rocksdb/status.h"
#include "table/block_based/reader_common.h"
#include "util/coding.h"
namespace ROCKSDB_NAMESPACE {
template <CacheEntryRole R>
CacheReservationManagerImpl<R>::CacheReservationHandle::CacheReservationHandle(
std::size_t incremental_memory_used,
std::shared_ptr<CacheReservationManagerImpl> cache_res_mgr)
: incremental_memory_used_(incremental_memory_used) {
assert(cache_res_mgr);
cache_res_mgr_ = cache_res_mgr;
}
template <CacheEntryRole R>
CacheReservationManagerImpl<
R>::CacheReservationHandle::~CacheReservationHandle() {
Status s = cache_res_mgr_->ReleaseCacheReservation(incremental_memory_used_);
s.PermitUncheckedError();
}
template <CacheEntryRole R>
CacheReservationManagerImpl<R>::CacheReservationManagerImpl(
std::shared_ptr<Cache> cache, bool delayed_decrease)
: delayed_decrease_(delayed_decrease),
cache_allocated_size_(0),
memory_used_(0) {
assert(cache != nullptr);
cache_ = cache;
}
template <CacheEntryRole R>
CacheReservationManagerImpl<R>::~CacheReservationManagerImpl() {
for (auto* handle : dummy_handles_) {
cache_->Release(handle, true);
}
}
template <CacheEntryRole R>
Status CacheReservationManagerImpl<R>::UpdateCacheReservation(
std::size_t new_mem_used) {
memory_used_ = new_mem_used;
std::size_t cur_cache_allocated_size =
cache_allocated_size_.load(std::memory_order_relaxed);
if (new_mem_used == cur_cache_allocated_size) {
return Status::OK();
} else if (new_mem_used > cur_cache_allocated_size) {
Status s = IncreaseCacheReservation(new_mem_used);
return s;
} else {
// In delayed decrease mode, we don't decrease cache reservation
// untill the memory usage is less than 3/4 of what we reserve
// in the cache.
// We do this because
// (1) Dummy entry insertion is expensive in block cache
// (2) Delayed releasing previously inserted dummy entries can save such
// expensive dummy entry insertion on memory increase in the near future,
// which is likely to happen when the memory usage is greater than or equal
// to 3/4 of what we reserve
if (delayed_decrease_ && new_mem_used >= cur_cache_allocated_size / 4 * 3) {
return Status::OK();
} else {
Status s = DecreaseCacheReservation(new_mem_used);
return s;
}
}
}
template <CacheEntryRole R>
Status CacheReservationManagerImpl<R>::MakeCacheReservation(
std::size_t incremental_memory_used,
std::unique_ptr<CacheReservationManager::CacheReservationHandle>* handle) {
assert(handle);
Status s =
UpdateCacheReservation(GetTotalMemoryUsed() + incremental_memory_used);
(*handle).reset(new CacheReservationManagerImpl::CacheReservationHandle(
incremental_memory_used,
std::enable_shared_from_this<
CacheReservationManagerImpl<R>>::shared_from_this()));
return s;
}
template <CacheEntryRole R>
Status CacheReservationManagerImpl<R>::ReleaseCacheReservation(
std::size_t incremental_memory_used) {
assert(GetTotalMemoryUsed() >= incremental_memory_used);
std::size_t updated_total_mem_used =
GetTotalMemoryUsed() - incremental_memory_used;
Status s = UpdateCacheReservation(updated_total_mem_used);
return s;
}
template <CacheEntryRole R>
Status CacheReservationManagerImpl<R>::IncreaseCacheReservation(
std::size_t new_mem_used) {
Status return_status = Status::OK();
while (new_mem_used > cache_allocated_size_.load(std::memory_order_relaxed)) {
Cache::Handle* handle = nullptr;
return_status = cache_->Insert(GetNextCacheKey(), nullptr, kSizeDummyEntry,
GetNoopDeleterForRole<R>(), &handle);
if (return_status != Status::OK()) {
return return_status;
}
dummy_handles_.push_back(handle);
cache_allocated_size_ += kSizeDummyEntry;
}
return return_status;
}
template <CacheEntryRole R>
Status CacheReservationManagerImpl<R>::DecreaseCacheReservation(
std::size_t new_mem_used) {
Status return_status = Status::OK();
// Decrease to the smallest multiple of kSizeDummyEntry that is greater than
// or equal to new_mem_used We do addition instead of new_mem_used <=
// cache_allocated_size_.load(std::memory_order_relaxed) - kSizeDummyEntry to
// avoid underflow of size_t when cache_allocated_size_ = 0
while (new_mem_used + kSizeDummyEntry <=
cache_allocated_size_.load(std::memory_order_relaxed)) {
assert(!dummy_handles_.empty());
auto* handle = dummy_handles_.back();
cache_->Release(handle, true);
dummy_handles_.pop_back();
cache_allocated_size_ -= kSizeDummyEntry;
}
return return_status;
}
template <CacheEntryRole R>
std::size_t CacheReservationManagerImpl<R>::GetTotalReservedCacheSize() {
return cache_allocated_size_.load(std::memory_order_relaxed);
}
template <CacheEntryRole R>
std::size_t CacheReservationManagerImpl<R>::GetTotalMemoryUsed() {
return memory_used_;
}
template <CacheEntryRole R>
Slice CacheReservationManagerImpl<R>::GetNextCacheKey() {
// Calling this function will have the side-effect of changing the
// underlying cache_key_ that is shared among other keys generated from this
// fucntion. Therefore please make sure the previous keys are saved/copied
// before calling this function.
cache_key_ = CacheKey::CreateUniqueForCacheLifetime(cache_.get());
return cache_key_.AsSlice();
}
template <CacheEntryRole R>
Cache::DeleterFn CacheReservationManagerImpl<R>::TEST_GetNoopDeleterForRole() {
return GetNoopDeleterForRole<R>();
}
template class CacheReservationManagerImpl<
CacheEntryRole::kBlockBasedTableReader>;
template class CacheReservationManagerImpl<
CacheEntryRole::kCompressionDictionaryBuildingBuffer>;
template class CacheReservationManagerImpl<CacheEntryRole::kFilterConstruction>;
template class CacheReservationManagerImpl<CacheEntryRole::kMisc>;
template class CacheReservationManagerImpl<CacheEntryRole::kWriteBuffer>;
} // namespace ROCKSDB_NAMESPACE

View File

@ -1,288 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#pragma once
#include <atomic>
#include <cstddef>
#include <cstdint>
#include <memory>
#include <mutex>
#include <vector>
#include "cache/cache_entry_roles.h"
#include "cache/cache_key.h"
#include "rocksdb/cache.h"
#include "rocksdb/slice.h"
#include "rocksdb/status.h"
#include "util/coding.h"
namespace ROCKSDB_NAMESPACE {
// CacheReservationManager is an interface for reserving cache space for the
// memory used
class CacheReservationManager {
public:
// CacheReservationHandle is for managing the lifetime of a cache reservation
// for an incremental amount of memory used (i.e, incremental_memory_used)
class CacheReservationHandle {
public:
virtual ~CacheReservationHandle() {}
};
virtual ~CacheReservationManager() {}
virtual Status UpdateCacheReservation(std::size_t new_memory_used) = 0;
virtual Status MakeCacheReservation(
std::size_t incremental_memory_used,
std::unique_ptr<CacheReservationManager::CacheReservationHandle>
*handle) = 0;
virtual std::size_t GetTotalReservedCacheSize() = 0;
virtual std::size_t GetTotalMemoryUsed() = 0;
};
// CacheReservationManagerImpl implements interface CacheReservationManager
// for reserving cache space for the memory used by inserting/releasing dummy
// entries in the cache.
//
// This class is NOT thread-safe, except that GetTotalReservedCacheSize()
// can be called without external synchronization.
template <CacheEntryRole R>
class CacheReservationManagerImpl
: public CacheReservationManager,
public std::enable_shared_from_this<CacheReservationManagerImpl<R>> {
public:
class CacheReservationHandle
: public CacheReservationManager::CacheReservationHandle {
public:
CacheReservationHandle(
std::size_t incremental_memory_used,
std::shared_ptr<CacheReservationManagerImpl> cache_res_mgr);
~CacheReservationHandle() override;
private:
std::size_t incremental_memory_used_;
std::shared_ptr<CacheReservationManagerImpl> cache_res_mgr_;
};
// Construct a CacheReservationManagerImpl
// @param cache The cache where dummy entries are inserted and released for
// reserving cache space
// @param delayed_decrease If set true, then dummy entries won't be released
// immediately when memory usage decreases.
// Instead, it will be released when the memory usage
// decreases to 3/4 of what we have reserved so far.
// This is for saving some future dummy entry
// insertion when memory usage increases are likely to
// happen in the near future.
//
// REQUIRED: cache is not nullptr
explicit CacheReservationManagerImpl(std::shared_ptr<Cache> cache,
bool delayed_decrease = false);
// no copy constructor, copy assignment, move constructor, move assignment
CacheReservationManagerImpl(const CacheReservationManagerImpl &) = delete;
CacheReservationManagerImpl &operator=(const CacheReservationManagerImpl &) =
delete;
CacheReservationManagerImpl(CacheReservationManagerImpl &&) = delete;
CacheReservationManagerImpl &operator=(CacheReservationManagerImpl &&) =
delete;
~CacheReservationManagerImpl() override;
// One of the two ways of reserving/releasing cache space,
// see MakeCacheReservation() for the other.
//
// Use ONLY one of these two ways to prevent unexpected behavior.
//
// Insert and release dummy entries in the cache to
// match the size of total dummy entries with the least multiple of
// kSizeDummyEntry greater than or equal to new_mem_used
//
// Insert dummy entries if new_memory_used > cache_allocated_size_;
//
// Release dummy entries if new_memory_used < cache_allocated_size_
// (and new_memory_used < cache_allocated_size_ * 3/4
// when delayed_decrease is set true);
//
// Keey dummy entries the same if (1) new_memory_used == cache_allocated_size_
// or (2) new_memory_used is in the interval of
// [cache_allocated_size_ * 3/4, cache_allocated_size) when delayed_decrease
// is set true.
//
// @param new_memory_used The number of bytes used by new memory
// The most recent new_memoy_used passed in will be returned
// in GetTotalMemoryUsed() even when the call return non-ok status.
//
// Since the class is NOT thread-safe, external synchronization on the
// order of calling UpdateCacheReservation() is needed if you want
// GetTotalMemoryUsed() indeed returns the latest memory used.
//
// @return On inserting dummy entries, it returns Status::OK() if all dummy
// entry insertions succeed.
// Otherwise, it returns the first non-ok status;
// On releasing dummy entries, it always returns Status::OK().
// On keeping dummy entries the same, it always returns Status::OK().
Status UpdateCacheReservation(std::size_t new_memory_used) override;
// One of the two ways of reserving cache space and releasing is done through
// destruction of CacheReservationHandle.
// See UpdateCacheReservation() for the other way.
//
// Use ONLY one of these two ways to prevent unexpected behavior.
//
// Insert dummy entries in the cache for the incremental memory usage
// to match the size of total dummy entries with the least multiple of
// kSizeDummyEntry greater than or equal to the total memory used.
//
// A CacheReservationHandle is returned as an output parameter.
// The reserved dummy entries are automatically released on the destruction of
// this handle, which achieves better RAII per cache reservation.
//
// WARNING: Deallocate all the handles of the CacheReservationManager object
// before deallocating the object to prevent unexpected behavior.
//
// @param incremental_memory_used The number of bytes increased in memory
// usage.
//
// Calling GetTotalMemoryUsed() afterward will return the total memory
// increased by this number, even when calling MakeCacheReservation()
// returns non-ok status.
//
// Since the class is NOT thread-safe, external synchronization in
// calling MakeCacheReservation() is needed if you want
// GetTotalMemoryUsed() indeed returns the latest memory used.
//
// @param handle An pointer to std::unique_ptr<CacheReservationHandle> that
// manages the lifetime of the cache reservation represented by the
// handle.
//
// @return It returns Status::OK() if all dummy
// entry insertions succeed.
// Otherwise, it returns the first non-ok status;
//
// REQUIRES: handle != nullptr
Status MakeCacheReservation(
std::size_t incremental_memory_used,
std::unique_ptr<CacheReservationManager::CacheReservationHandle> *handle)
override;
// Return the size of the cache (which is a multiple of kSizeDummyEntry)
// successfully reserved by calling UpdateCacheReservation().
//
// When UpdateCacheReservation() returns non-ok status,
// calling GetTotalReservedCacheSize() after that might return a slightly
// smaller number than the actual reserved cache size due to
// the returned number will always be a multiple of kSizeDummyEntry
// and cache full might happen in the middle of inserting a dummy entry.
std::size_t GetTotalReservedCacheSize() override;
// Return the latest total memory used indicated by the most recent call of
// UpdateCacheReservation(std::size_t new_memory_used);
std::size_t GetTotalMemoryUsed() override;
static constexpr std::size_t GetDummyEntrySize() { return kSizeDummyEntry; }
// For testing only - it is to help ensure the NoopDeleterForRole<R>
// accessed from CacheReservationManagerImpl and the one accessed from the
// test are from the same translation units
static Cache::DeleterFn TEST_GetNoopDeleterForRole();
private:
static constexpr std::size_t kSizeDummyEntry = 256 * 1024;
Slice GetNextCacheKey();
Status ReleaseCacheReservation(std::size_t incremental_memory_used);
Status IncreaseCacheReservation(std::size_t new_mem_used);
Status DecreaseCacheReservation(std::size_t new_mem_used);
std::shared_ptr<Cache> cache_;
bool delayed_decrease_;
std::atomic<std::size_t> cache_allocated_size_;
std::size_t memory_used_;
std::vector<Cache::Handle *> dummy_handles_;
CacheKey cache_key_;
};
class ConcurrentCacheReservationManager
: public CacheReservationManager,
public std::enable_shared_from_this<ConcurrentCacheReservationManager> {
public:
class CacheReservationHandle
: public CacheReservationManager::CacheReservationHandle {
public:
CacheReservationHandle(
std::shared_ptr<ConcurrentCacheReservationManager> cache_res_mgr,
std::unique_ptr<CacheReservationManager::CacheReservationHandle>
cache_res_handle) {
assert(cache_res_mgr && cache_res_handle);
cache_res_mgr_ = cache_res_mgr;
cache_res_handle_ = std::move(cache_res_handle);
}
~CacheReservationHandle() override {
std::lock_guard<std::mutex> lock(cache_res_mgr_->cache_res_mgr_mu_);
cache_res_handle_.reset();
}
private:
std::shared_ptr<ConcurrentCacheReservationManager> cache_res_mgr_;
std::unique_ptr<CacheReservationManager::CacheReservationHandle>
cache_res_handle_;
};
explicit ConcurrentCacheReservationManager(
std::shared_ptr<CacheReservationManager> cache_res_mgr) {
cache_res_mgr_ = std::move(cache_res_mgr);
}
ConcurrentCacheReservationManager(const ConcurrentCacheReservationManager &) =
delete;
ConcurrentCacheReservationManager &operator=(
const ConcurrentCacheReservationManager &) = delete;
ConcurrentCacheReservationManager(ConcurrentCacheReservationManager &&) =
delete;
ConcurrentCacheReservationManager &operator=(
ConcurrentCacheReservationManager &&) = delete;
~ConcurrentCacheReservationManager() override {}
inline Status UpdateCacheReservation(std::size_t new_memory_used) override {
std::lock_guard<std::mutex> lock(cache_res_mgr_mu_);
return cache_res_mgr_->UpdateCacheReservation(new_memory_used);
}
inline Status MakeCacheReservation(
std::size_t incremental_memory_used,
std::unique_ptr<CacheReservationManager::CacheReservationHandle> *handle)
override {
std::unique_ptr<CacheReservationManager::CacheReservationHandle>
wrapped_handle;
Status s;
{
std::lock_guard<std::mutex> lock(cache_res_mgr_mu_);
s = cache_res_mgr_->MakeCacheReservation(incremental_memory_used,
&wrapped_handle);
}
(*handle).reset(
new ConcurrentCacheReservationManager::CacheReservationHandle(
std::enable_shared_from_this<
ConcurrentCacheReservationManager>::shared_from_this(),
std::move(wrapped_handle)));
return s;
}
inline std::size_t GetTotalReservedCacheSize() override {
return cache_res_mgr_->GetTotalReservedCacheSize();
}
inline std::size_t GetTotalMemoryUsed() override {
std::lock_guard<std::mutex> lock(cache_res_mgr_mu_);
return cache_res_mgr_->GetTotalMemoryUsed();
}
private:
std::mutex cache_res_mgr_mu_;
std::shared_ptr<CacheReservationManager> cache_res_mgr_;
};
} // namespace ROCKSDB_NAMESPACE

View File

@ -1,468 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "cache/cache_reservation_manager.h"
#include <cstddef>
#include <cstring>
#include <memory>
#include "cache/cache_entry_roles.h"
#include "rocksdb/cache.h"
#include "rocksdb/slice.h"
#include "test_util/testharness.h"
#include "util/coding.h"
namespace ROCKSDB_NAMESPACE {
class CacheReservationManagerTest : public ::testing::Test {
protected:
static constexpr std::size_t kSizeDummyEntry =
CacheReservationManagerImpl<CacheEntryRole::kMisc>::GetDummyEntrySize();
static constexpr std::size_t kCacheCapacity = 4096 * kSizeDummyEntry;
static constexpr int kNumShardBits = 0; // 2^0 shard
static constexpr std::size_t kMetaDataChargeOverhead = 10000;
std::shared_ptr<Cache> cache = NewLRUCache(kCacheCapacity, kNumShardBits);
std::shared_ptr<CacheReservationManager> test_cache_rev_mng;
CacheReservationManagerTest() {
test_cache_rev_mng =
std::make_shared<CacheReservationManagerImpl<CacheEntryRole::kMisc>>(
cache);
}
};
TEST_F(CacheReservationManagerTest, GenerateCacheKey) {
std::size_t new_mem_used = 1 * kSizeDummyEntry;
Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
ASSERT_EQ(s, Status::OK());
ASSERT_GE(cache->GetPinnedUsage(), 1 * kSizeDummyEntry);
ASSERT_LT(cache->GetPinnedUsage(),
1 * kSizeDummyEntry + kMetaDataChargeOverhead);
// Next unique Cache key
CacheKey ckey = CacheKey::CreateUniqueForCacheLifetime(cache.get());
// Get to the underlying values
using PairU64 = std::array<uint64_t, 2>;
auto& ckey_pair = *reinterpret_cast<PairU64*>(&ckey);
// Back it up to the one used by CRM (using CacheKey implementation details)
ckey_pair[1]--;
// Specific key (subject to implementation details)
EXPECT_EQ(ckey_pair, PairU64({0, 2}));
Cache::Handle* handle = cache->Lookup(ckey.AsSlice());
EXPECT_NE(handle, nullptr)
<< "Failed to generate the cache key for the dummy entry correctly";
// Clean up the returned handle from Lookup() to prevent memory leak
cache->Release(handle);
}
TEST_F(CacheReservationManagerTest, KeepCacheReservationTheSame) {
std::size_t new_mem_used = 1 * kSizeDummyEntry;
Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
ASSERT_EQ(s, Status::OK());
ASSERT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
1 * kSizeDummyEntry);
ASSERT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used);
std::size_t initial_pinned_usage = cache->GetPinnedUsage();
ASSERT_GE(initial_pinned_usage, 1 * kSizeDummyEntry);
ASSERT_LT(initial_pinned_usage,
1 * kSizeDummyEntry + kMetaDataChargeOverhead);
s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::OK())
<< "Failed to keep cache reservation the same when new_mem_used equals "
"to current cache reservation";
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
1 * kSizeDummyEntry)
<< "Failed to bookkeep correctly when new_mem_used equals to current "
"cache reservation";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly when new_mem_used "
"equals to current cache reservation";
EXPECT_EQ(cache->GetPinnedUsage(), initial_pinned_usage)
<< "Failed to keep underlying dummy entries the same when new_mem_used "
"equals to current cache reservation";
}
TEST_F(CacheReservationManagerTest,
IncreaseCacheReservationByMultiplesOfDummyEntrySize) {
std::size_t new_mem_used = 2 * kSizeDummyEntry;
Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::OK())
<< "Failed to increase cache reservation correctly";
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
2 * kSizeDummyEntry)
<< "Failed to bookkeep cache reservation increase correctly";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly";
EXPECT_GE(cache->GetPinnedUsage(), 2 * kSizeDummyEntry)
<< "Failed to increase underlying dummy entries in cache correctly";
EXPECT_LT(cache->GetPinnedUsage(),
2 * kSizeDummyEntry + kMetaDataChargeOverhead)
<< "Failed to increase underlying dummy entries in cache correctly";
}
TEST_F(CacheReservationManagerTest,
IncreaseCacheReservationNotByMultiplesOfDummyEntrySize) {
std::size_t new_mem_used = 2 * kSizeDummyEntry + kSizeDummyEntry / 2;
Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::OK())
<< "Failed to increase cache reservation correctly";
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
3 * kSizeDummyEntry)
<< "Failed to bookkeep cache reservation increase correctly";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly";
EXPECT_GE(cache->GetPinnedUsage(), 3 * kSizeDummyEntry)
<< "Failed to increase underlying dummy entries in cache correctly";
EXPECT_LT(cache->GetPinnedUsage(),
3 * kSizeDummyEntry + kMetaDataChargeOverhead)
<< "Failed to increase underlying dummy entries in cache correctly";
}
TEST(CacheReservationManagerIncreaseReservcationOnFullCacheTest,
IncreaseCacheReservationOnFullCache) {
;
constexpr std::size_t kSizeDummyEntry =
CacheReservationManagerImpl<CacheEntryRole::kMisc>::GetDummyEntrySize();
constexpr std::size_t kSmallCacheCapacity = 4 * kSizeDummyEntry;
constexpr std::size_t kBigCacheCapacity = 4096 * kSizeDummyEntry;
constexpr std::size_t kMetaDataChargeOverhead = 10000;
LRUCacheOptions lo;
lo.capacity = kSmallCacheCapacity;
lo.num_shard_bits = 0; // 2^0 shard
lo.strict_capacity_limit = true;
std::shared_ptr<Cache> cache = NewLRUCache(lo);
std::shared_ptr<CacheReservationManager> test_cache_rev_mng =
std::make_shared<CacheReservationManagerImpl<CacheEntryRole::kMisc>>(
cache);
std::size_t new_mem_used = kSmallCacheCapacity + 1;
Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::Incomplete())
<< "Failed to return status to indicate failure of dummy entry insertion "
"during cache reservation on full cache";
EXPECT_GE(test_cache_rev_mng->GetTotalReservedCacheSize(),
1 * kSizeDummyEntry)
<< "Failed to bookkeep correctly before cache resevation failure happens "
"due to full cache";
EXPECT_LE(test_cache_rev_mng->GetTotalReservedCacheSize(),
kSmallCacheCapacity)
<< "Failed to bookkeep correctly (i.e, bookkeep only successful dummy "
"entry insertions) when encountering cache resevation failure due to "
"full cache";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly";
EXPECT_GE(cache->GetPinnedUsage(), 1 * kSizeDummyEntry)
<< "Failed to insert underlying dummy entries correctly when "
"encountering cache resevation failure due to full cache";
EXPECT_LE(cache->GetPinnedUsage(), kSmallCacheCapacity)
<< "Failed to insert underlying dummy entries correctly when "
"encountering cache resevation failure due to full cache";
new_mem_used = kSmallCacheCapacity / 2; // 2 dummy entries
s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::OK())
<< "Failed to decrease cache reservation after encountering cache "
"reservation failure due to full cache";
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
2 * kSizeDummyEntry)
<< "Failed to bookkeep cache reservation decrease correctly after "
"encountering cache reservation due to full cache";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly";
EXPECT_GE(cache->GetPinnedUsage(), 2 * kSizeDummyEntry)
<< "Failed to release underlying dummy entries correctly on cache "
"reservation decrease after encountering cache resevation failure due "
"to full cache";
EXPECT_LT(cache->GetPinnedUsage(),
2 * kSizeDummyEntry + kMetaDataChargeOverhead)
<< "Failed to release underlying dummy entries correctly on cache "
"reservation decrease after encountering cache resevation failure due "
"to full cache";
// Create cache full again for subsequent tests
new_mem_used = kSmallCacheCapacity + 1;
s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::Incomplete())
<< "Failed to return status to indicate failure of dummy entry insertion "
"during cache reservation on full cache";
EXPECT_GE(test_cache_rev_mng->GetTotalReservedCacheSize(),
1 * kSizeDummyEntry)
<< "Failed to bookkeep correctly before cache resevation failure happens "
"due to full cache";
EXPECT_LE(test_cache_rev_mng->GetTotalReservedCacheSize(),
kSmallCacheCapacity)
<< "Failed to bookkeep correctly (i.e, bookkeep only successful dummy "
"entry insertions) when encountering cache resevation failure due to "
"full cache";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly";
EXPECT_GE(cache->GetPinnedUsage(), 1 * kSizeDummyEntry)
<< "Failed to insert underlying dummy entries correctly when "
"encountering cache resevation failure due to full cache";
EXPECT_LE(cache->GetPinnedUsage(), kSmallCacheCapacity)
<< "Failed to insert underlying dummy entries correctly when "
"encountering cache resevation failure due to full cache";
// Increase cache capacity so the previously failed insertion can fully
// succeed
cache->SetCapacity(kBigCacheCapacity);
new_mem_used = kSmallCacheCapacity + 1;
s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::OK())
<< "Failed to increase cache reservation after increasing cache capacity "
"and mitigating cache full error";
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
5 * kSizeDummyEntry)
<< "Failed to bookkeep cache reservation increase correctly after "
"increasing cache capacity and mitigating cache full error";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly";
EXPECT_GE(cache->GetPinnedUsage(), 5 * kSizeDummyEntry)
<< "Failed to insert underlying dummy entries correctly after increasing "
"cache capacity and mitigating cache full error";
EXPECT_LT(cache->GetPinnedUsage(),
5 * kSizeDummyEntry + kMetaDataChargeOverhead)
<< "Failed to insert underlying dummy entries correctly after increasing "
"cache capacity and mitigating cache full error";
}
TEST_F(CacheReservationManagerTest,
DecreaseCacheReservationByMultiplesOfDummyEntrySize) {
std::size_t new_mem_used = 2 * kSizeDummyEntry;
Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
ASSERT_EQ(s, Status::OK());
ASSERT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
2 * kSizeDummyEntry);
ASSERT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used);
ASSERT_GE(cache->GetPinnedUsage(), 2 * kSizeDummyEntry);
ASSERT_LT(cache->GetPinnedUsage(),
2 * kSizeDummyEntry + kMetaDataChargeOverhead);
new_mem_used = 1 * kSizeDummyEntry;
s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::OK())
<< "Failed to decrease cache reservation correctly";
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
1 * kSizeDummyEntry)
<< "Failed to bookkeep cache reservation decrease correctly";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly";
EXPECT_GE(cache->GetPinnedUsage(), 1 * kSizeDummyEntry)
<< "Failed to decrease underlying dummy entries in cache correctly";
EXPECT_LT(cache->GetPinnedUsage(),
1 * kSizeDummyEntry + kMetaDataChargeOverhead)
<< "Failed to decrease underlying dummy entries in cache correctly";
}
TEST_F(CacheReservationManagerTest,
DecreaseCacheReservationNotByMultiplesOfDummyEntrySize) {
std::size_t new_mem_used = 2 * kSizeDummyEntry;
Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
ASSERT_EQ(s, Status::OK());
ASSERT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
2 * kSizeDummyEntry);
ASSERT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used);
ASSERT_GE(cache->GetPinnedUsage(), 2 * kSizeDummyEntry);
ASSERT_LT(cache->GetPinnedUsage(),
2 * kSizeDummyEntry + kMetaDataChargeOverhead);
new_mem_used = kSizeDummyEntry / 2;
s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::OK())
<< "Failed to decrease cache reservation correctly";
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
1 * kSizeDummyEntry)
<< "Failed to bookkeep cache reservation decrease correctly";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly";
EXPECT_GE(cache->GetPinnedUsage(), 1 * kSizeDummyEntry)
<< "Failed to decrease underlying dummy entries in cache correctly";
EXPECT_LT(cache->GetPinnedUsage(),
1 * kSizeDummyEntry + kMetaDataChargeOverhead)
<< "Failed to decrease underlying dummy entries in cache correctly";
}
TEST(CacheReservationManagerWithDelayedDecreaseTest,
DecreaseCacheReservationWithDelayedDecrease) {
constexpr std::size_t kSizeDummyEntry =
CacheReservationManagerImpl<CacheEntryRole::kMisc>::GetDummyEntrySize();
constexpr std::size_t kCacheCapacity = 4096 * kSizeDummyEntry;
constexpr std::size_t kMetaDataChargeOverhead = 10000;
LRUCacheOptions lo;
lo.capacity = kCacheCapacity;
lo.num_shard_bits = 0;
std::shared_ptr<Cache> cache = NewLRUCache(lo);
std::shared_ptr<CacheReservationManager> test_cache_rev_mng =
std::make_shared<CacheReservationManagerImpl<CacheEntryRole::kMisc>>(
cache, true /* delayed_decrease */);
std::size_t new_mem_used = 8 * kSizeDummyEntry;
Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
ASSERT_EQ(s, Status::OK());
ASSERT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
8 * kSizeDummyEntry);
ASSERT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used);
std::size_t initial_pinned_usage = cache->GetPinnedUsage();
ASSERT_GE(initial_pinned_usage, 8 * kSizeDummyEntry);
ASSERT_LT(initial_pinned_usage,
8 * kSizeDummyEntry + kMetaDataChargeOverhead);
new_mem_used = 6 * kSizeDummyEntry;
s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::OK()) << "Failed to delay decreasing cache reservation";
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
8 * kSizeDummyEntry)
<< "Failed to bookkeep correctly when delaying cache reservation "
"decrease";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly";
EXPECT_EQ(cache->GetPinnedUsage(), initial_pinned_usage)
<< "Failed to delay decreasing underlying dummy entries in cache";
new_mem_used = 7 * kSizeDummyEntry;
s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::OK()) << "Failed to delay decreasing cache reservation";
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
8 * kSizeDummyEntry)
<< "Failed to bookkeep correctly when delaying cache reservation "
"decrease";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly";
EXPECT_EQ(cache->GetPinnedUsage(), initial_pinned_usage)
<< "Failed to delay decreasing underlying dummy entries in cache";
new_mem_used = 6 * kSizeDummyEntry - 1;
s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::OK())
<< "Failed to decrease cache reservation correctly when new_mem_used < "
"GetTotalReservedCacheSize() * 3 / 4 on delayed decrease mode";
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
6 * kSizeDummyEntry)
<< "Failed to bookkeep correctly when new_mem_used < "
"GetTotalReservedCacheSize() * 3 / 4 on delayed decrease mode";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly";
EXPECT_GE(cache->GetPinnedUsage(), 6 * kSizeDummyEntry)
<< "Failed to decrease underlying dummy entries in cache when "
"new_mem_used < GetTotalReservedCacheSize() * 3 / 4 on delayed "
"decrease mode";
EXPECT_LT(cache->GetPinnedUsage(),
6 * kSizeDummyEntry + kMetaDataChargeOverhead)
<< "Failed to decrease underlying dummy entries in cache when "
"new_mem_used < GetTotalReservedCacheSize() * 3 / 4 on delayed "
"decrease mode";
}
TEST(CacheReservationManagerDestructorTest,
ReleaseRemainingDummyEntriesOnDestruction) {
constexpr std::size_t kSizeDummyEntry =
CacheReservationManagerImpl<CacheEntryRole::kMisc>::GetDummyEntrySize();
constexpr std::size_t kCacheCapacity = 4096 * kSizeDummyEntry;
constexpr std::size_t kMetaDataChargeOverhead = 10000;
LRUCacheOptions lo;
lo.capacity = kCacheCapacity;
lo.num_shard_bits = 0;
std::shared_ptr<Cache> cache = NewLRUCache(lo);
{
std::shared_ptr<CacheReservationManager> test_cache_rev_mng =
std::make_shared<CacheReservationManagerImpl<CacheEntryRole::kMisc>>(
cache);
std::size_t new_mem_used = 1 * kSizeDummyEntry;
Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
ASSERT_EQ(s, Status::OK());
ASSERT_GE(cache->GetPinnedUsage(), 1 * kSizeDummyEntry);
ASSERT_LT(cache->GetPinnedUsage(),
1 * kSizeDummyEntry + kMetaDataChargeOverhead);
}
EXPECT_EQ(cache->GetPinnedUsage(), 0 * kSizeDummyEntry)
<< "Failed to release remaining underlying dummy entries in cache in "
"CacheReservationManager's destructor";
}
TEST(CacheReservationHandleTest, HandleTest) {
constexpr std::size_t kOneGigabyte = 1024 * 1024 * 1024;
constexpr std::size_t kSizeDummyEntry = 256 * 1024;
constexpr std::size_t kMetaDataChargeOverhead = 10000;
LRUCacheOptions lo;
lo.capacity = kOneGigabyte;
lo.num_shard_bits = 0;
std::shared_ptr<Cache> cache = NewLRUCache(lo);
std::shared_ptr<CacheReservationManager> test_cache_rev_mng(
std::make_shared<CacheReservationManagerImpl<CacheEntryRole::kMisc>>(
cache));
std::size_t mem_used = 0;
const std::size_t incremental_mem_used_handle_1 = 1 * kSizeDummyEntry;
const std::size_t incremental_mem_used_handle_2 = 2 * kSizeDummyEntry;
std::unique_ptr<CacheReservationManager::CacheReservationHandle> handle_1,
handle_2;
// To test consecutive CacheReservationManager::MakeCacheReservation works
// correctly in terms of returning the handle as well as updating cache
// reservation and the latest total memory used
Status s = test_cache_rev_mng->MakeCacheReservation(
incremental_mem_used_handle_1, &handle_1);
mem_used = mem_used + incremental_mem_used_handle_1;
ASSERT_EQ(s, Status::OK());
EXPECT_TRUE(handle_1 != nullptr);
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), mem_used);
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), mem_used);
EXPECT_GE(cache->GetPinnedUsage(), mem_used);
EXPECT_LT(cache->GetPinnedUsage(), mem_used + kMetaDataChargeOverhead);
s = test_cache_rev_mng->MakeCacheReservation(incremental_mem_used_handle_2,
&handle_2);
mem_used = mem_used + incremental_mem_used_handle_2;
ASSERT_EQ(s, Status::OK());
EXPECT_TRUE(handle_2 != nullptr);
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), mem_used);
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), mem_used);
EXPECT_GE(cache->GetPinnedUsage(), mem_used);
EXPECT_LT(cache->GetPinnedUsage(), mem_used + kMetaDataChargeOverhead);
// To test
// CacheReservationManager::CacheReservationHandle::~CacheReservationHandle()
// works correctly in releasing the cache reserved for the handle
handle_1.reset();
EXPECT_TRUE(handle_1 == nullptr);
mem_used = mem_used - incremental_mem_used_handle_1;
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), mem_used);
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), mem_used);
EXPECT_GE(cache->GetPinnedUsage(), mem_used);
EXPECT_LT(cache->GetPinnedUsage(), mem_used + kMetaDataChargeOverhead);
// To test the actual CacheReservationManager object won't be deallocated
// as long as there remain handles pointing to it.
// We strongly recommend deallocating CacheReservationManager object only
// after all its handles are deallocated to keep things easy to reasonate
test_cache_rev_mng.reset();
EXPECT_GE(cache->GetPinnedUsage(), mem_used);
EXPECT_LT(cache->GetPinnedUsage(), mem_used + kMetaDataChargeOverhead);
handle_2.reset();
// The CacheReservationManager object is now deallocated since all the handles
// and its original pointer is gone
mem_used = mem_used - incremental_mem_used_handle_2;
EXPECT_EQ(mem_used, 0);
EXPECT_EQ(cache->GetPinnedUsage(), mem_used);
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

165
cache/cache_test.cc vendored
View File

@ -14,9 +14,7 @@
#include <iostream>
#include <string>
#include <vector>
#include "cache/clock_cache.h"
#include "cache/fast_lru_cache.h"
#include "cache/lru_cache.h"
#include "test_util/testharness.h"
#include "util/coding.h"
@ -41,7 +39,6 @@ static int DecodeValue(void* v) {
const std::string kLRU = "lru";
const std::string kClock = "clock";
const std::string kFast = "fast";
void dumbDeleter(const Slice& /*key*/, void* /*value*/) {}
@ -86,9 +83,6 @@ class CacheTest : public testing::TestWithParam<std::string> {
if (type == kClock) {
return NewClockCache(capacity);
}
if (type == kFast) {
return NewFastLRUCache(capacity);
}
return nullptr;
}
@ -109,10 +103,6 @@ class CacheTest : public testing::TestWithParam<std::string> {
return NewClockCache(capacity, num_shard_bits, strict_capacity_limit,
charge_policy);
}
if (type == kFast) {
return NewFastLRUCache(capacity, num_shard_bits, strict_capacity_limit,
charge_policy);
}
return nullptr;
}
@ -127,8 +117,8 @@ class CacheTest : public testing::TestWithParam<std::string> {
void Insert(std::shared_ptr<Cache> cache, int key, int value,
int charge = 1) {
EXPECT_OK(cache->Insert(EncodeKey(key), EncodeValue(value), charge,
&CacheTest::Deleter));
cache->Insert(EncodeKey(key), EncodeValue(value), charge,
&CacheTest::Deleter);
}
void Erase(std::shared_ptr<Cache> cache, int key) {
@ -177,10 +167,9 @@ TEST_P(CacheTest, UsageTest) {
for (int i = 1; i < 100; ++i) {
std::string key(i, 'a');
auto kv_size = key.size() + 5;
ASSERT_OK(cache->Insert(key, reinterpret_cast<void*>(value), kv_size,
dumbDeleter));
ASSERT_OK(precise_cache->Insert(key, reinterpret_cast<void*>(value),
kv_size, dumbDeleter));
cache->Insert(key, reinterpret_cast<void*>(value), kv_size, dumbDeleter);
precise_cache->Insert(key, reinterpret_cast<void*>(value), kv_size,
dumbDeleter);
usage += kv_size;
ASSERT_EQ(usage, cache->GetUsage());
ASSERT_LT(usage, precise_cache->GetUsage());
@ -193,11 +182,11 @@ TEST_P(CacheTest, UsageTest) {
// make sure the cache will be overloaded
for (uint64_t i = 1; i < kCapacity; ++i) {
auto key = std::to_string(i);
ASSERT_OK(cache->Insert(key, reinterpret_cast<void*>(value), key.size() + 5,
dumbDeleter));
ASSERT_OK(precise_cache->Insert(key, reinterpret_cast<void*>(value),
key.size() + 5, dumbDeleter));
auto key = ToString(i);
cache->Insert(key, reinterpret_cast<void*>(value), key.size() + 5,
dumbDeleter);
precise_cache->Insert(key, reinterpret_cast<void*>(value), key.size() + 5,
dumbDeleter);
}
// the usage should be close to the capacity
@ -226,12 +215,11 @@ TEST_P(CacheTest, PinnedUsageTest) {
auto kv_size = key.size() + 5;
Cache::Handle* handle;
Cache::Handle* handle_in_precise_cache;
ASSERT_OK(cache->Insert(key, reinterpret_cast<void*>(value), kv_size,
dumbDeleter, &handle));
cache->Insert(key, reinterpret_cast<void*>(value), kv_size, dumbDeleter,
&handle);
assert(handle);
ASSERT_OK(precise_cache->Insert(key, reinterpret_cast<void*>(value),
kv_size, dumbDeleter,
&handle_in_precise_cache));
precise_cache->Insert(key, reinterpret_cast<void*>(value), kv_size,
dumbDeleter, &handle_in_precise_cache);
assert(handle_in_precise_cache);
pinned_usage += kv_size;
ASSERT_EQ(pinned_usage, cache->GetPinnedUsage());
@ -265,11 +253,11 @@ TEST_P(CacheTest, PinnedUsageTest) {
// check that overloading the cache does not change the pinned usage
for (uint64_t i = 1; i < 2 * kCapacity; ++i) {
auto key = std::to_string(i);
ASSERT_OK(cache->Insert(key, reinterpret_cast<void*>(value), key.size() + 5,
dumbDeleter));
ASSERT_OK(precise_cache->Insert(key, reinterpret_cast<void*>(value),
key.size() + 5, dumbDeleter));
auto key = ToString(i);
cache->Insert(key, reinterpret_cast<void*>(value), key.size() + 5,
dumbDeleter);
precise_cache->Insert(key, reinterpret_cast<void*>(value), key.size() + 5,
dumbDeleter);
}
ASSERT_EQ(pinned_usage, cache->GetPinnedUsage());
ASSERT_EQ(precise_cache_pinned_usage, precise_cache->GetPinnedUsage());
@ -585,7 +573,7 @@ TEST_P(CacheTest, SetCapacity) {
std::vector<Cache::Handle*> handles(10);
// Insert 5 entries, but not releasing.
for (size_t i = 0; i < 5; i++) {
std::string key = std::to_string(i + 1);
std::string key = ToString(i+1);
Status s = cache->Insert(key, new Value(i + 1), 1, &deleter, &handles[i]);
ASSERT_TRUE(s.ok());
}
@ -600,7 +588,7 @@ TEST_P(CacheTest, SetCapacity) {
// then decrease capacity to 7, final capacity should be 7
// and usage should be 7
for (size_t i = 5; i < 10; i++) {
std::string key = std::to_string(i + 1);
std::string key = ToString(i+1);
Status s = cache->Insert(key, new Value(i + 1), 1, &deleter, &handles[i]);
ASSERT_TRUE(s.ok());
}
@ -619,9 +607,6 @@ TEST_P(CacheTest, SetCapacity) {
for (size_t i = 5; i < 10; i++) {
cache->Release(handles[i]);
}
// Make sure this doesn't crash or upset ASAN/valgrind
cache->DisownData();
}
TEST_P(LRUCacheTest, SetStrictCapacityLimit) {
@ -631,7 +616,7 @@ TEST_P(LRUCacheTest, SetStrictCapacityLimit) {
std::vector<Cache::Handle*> handles(10);
Status s;
for (size_t i = 0; i < 10; i++) {
std::string key = std::to_string(i + 1);
std::string key = ToString(i + 1);
s = cache->Insert(key, new Value(i + 1), 1, &deleter, &handles[i]);
ASSERT_OK(s);
ASSERT_NE(nullptr, handles[i]);
@ -655,7 +640,7 @@ TEST_P(LRUCacheTest, SetStrictCapacityLimit) {
// test3: init with flag being true.
std::shared_ptr<Cache> cache2 = NewCache(5, 0, true);
for (size_t i = 0; i < 5; i++) {
std::string key = std::to_string(i + 1);
std::string key = ToString(i + 1);
s = cache2->Insert(key, new Value(i + 1), 1, &deleter, &handles[i]);
ASSERT_OK(s);
ASSERT_NE(nullptr, handles[i]);
@ -685,14 +670,14 @@ TEST_P(CacheTest, OverCapacity) {
// Insert n+1 entries, but not releasing.
for (size_t i = 0; i < n + 1; i++) {
std::string key = std::to_string(i + 1);
std::string key = ToString(i+1);
Status s = cache->Insert(key, new Value(i + 1), 1, &deleter, &handles[i]);
ASSERT_TRUE(s.ok());
}
// Guess what's in the cache now?
for (size_t i = 0; i < n + 1; i++) {
std::string key = std::to_string(i + 1);
std::string key = ToString(i+1);
auto h = cache->Lookup(key);
ASSERT_TRUE(h != nullptr);
if (h) cache->Release(h);
@ -713,7 +698,7 @@ TEST_P(CacheTest, OverCapacity) {
// This is consistent with the LRU policy since the element 0
// was released first
for (size_t i = 0; i < n + 1; i++) {
std::string key = std::to_string(i + 1);
std::string key = ToString(i+1);
auto h = cache->Lookup(key);
if (h) {
ASSERT_NE(i, 0U);
@ -725,98 +710,25 @@ TEST_P(CacheTest, OverCapacity) {
}
namespace {
std::vector<std::pair<int, int>> legacy_callback_state;
void legacy_callback(void* value, size_t charge) {
legacy_callback_state.push_back(
{DecodeValue(value), static_cast<int>(charge)});
std::vector<std::pair<int, int>> callback_state;
void callback(void* entry, size_t charge) {
callback_state.push_back({DecodeValue(entry), static_cast<int>(charge)});
}
};
TEST_P(CacheTest, ApplyToAllCacheEntriesTest) {
TEST_P(CacheTest, ApplyToAllCacheEntiresTest) {
std::vector<std::pair<int, int>> inserted;
legacy_callback_state.clear();
callback_state.clear();
for (int i = 0; i < 10; ++i) {
Insert(i, i * 2, i + 1);
inserted.push_back({i * 2, i + 1});
}
cache_->ApplyToAllCacheEntries(legacy_callback, true);
std::sort(inserted.begin(), inserted.end());
std::sort(legacy_callback_state.begin(), legacy_callback_state.end());
ASSERT_EQ(inserted.size(), legacy_callback_state.size());
for (size_t i = 0; i < inserted.size(); ++i) {
EXPECT_EQ(inserted[i], legacy_callback_state[i]);
}
}
TEST_P(CacheTest, ApplyToAllEntriesTest) {
std::vector<std::string> callback_state;
const auto callback = [&](const Slice& key, void* value, size_t charge,
Cache::DeleterFn deleter) {
callback_state.push_back(std::to_string(DecodeKey(key)) + "," +
std::to_string(DecodeValue(value)) + "," +
std::to_string(charge));
assert(deleter == &CacheTest::Deleter);
};
std::vector<std::string> inserted;
callback_state.clear();
for (int i = 0; i < 10; ++i) {
Insert(i, i * 2, i + 1);
inserted.push_back(std::to_string(i) + "," + std::to_string(i * 2) + "," +
std::to_string(i + 1));
}
cache_->ApplyToAllEntries(callback, /*opts*/ {});
cache_->ApplyToAllCacheEntries(callback, true);
std::sort(inserted.begin(), inserted.end());
std::sort(callback_state.begin(), callback_state.end());
ASSERT_EQ(inserted.size(), callback_state.size());
for (size_t i = 0; i < inserted.size(); ++i) {
EXPECT_EQ(inserted[i], callback_state[i]);
}
}
TEST_P(CacheTest, ApplyToAllEntriesDuringResize) {
// This is a mini-stress test of ApplyToAllEntries, to ensure
// items in the cache that are neither added nor removed
// during ApplyToAllEntries are counted exactly once.
// Insert some entries that we expect to be seen exactly once
// during iteration.
constexpr int kSpecialCharge = 2;
constexpr int kNotSpecialCharge = 1;
constexpr int kSpecialCount = 100;
for (int i = 0; i < kSpecialCount; ++i) {
Insert(i, i * 2, kSpecialCharge);
}
// For callback
int special_count = 0;
const auto callback = [&](const Slice&, void*, size_t charge,
Cache::DeleterFn) {
if (charge == static_cast<size_t>(kSpecialCharge)) {
++special_count;
}
};
// Start counting
std::thread apply_thread([&]() {
// Use small average_entries_per_lock to make the problem difficult
Cache::ApplyToAllEntriesOptions opts;
opts.average_entries_per_lock = 2;
cache_->ApplyToAllEntries(callback, opts);
});
// In parallel, add more entries, enough to cause resize but not enough
// to cause ejections
for (int i = kSpecialCount * 1; i < kSpecialCount * 6; ++i) {
Insert(i, i * 2, kNotSpecialCharge);
}
apply_thread.join();
ASSERT_EQ(special_count, kSpecialCount);
ASSERT_TRUE(inserted == callback_state);
}
TEST_P(CacheTest, DefaultShardBits) {
@ -835,12 +747,11 @@ TEST_P(CacheTest, DefaultShardBits) {
ASSERT_EQ(6, sc->GetNumShardBits());
}
TEST_P(CacheTest, GetChargeAndDeleter) {
TEST_P(CacheTest, GetCharge) {
Insert(1, 2);
Cache::Handle* h1 = cache_->Lookup(EncodeKey(1));
ASSERT_EQ(2, DecodeValue(cache_->Value(h1)));
ASSERT_EQ(1, cache_->GetCharge(h1));
ASSERT_EQ(&CacheTest::Deleter, cache_->GetDeleter(h1));
cache_->Release(h1);
}
@ -848,13 +759,11 @@ TEST_P(CacheTest, GetChargeAndDeleter) {
std::shared_ptr<Cache> (*new_clock_cache_func)(
size_t, int, bool, CacheMetadataChargePolicy) = NewClockCache;
INSTANTIATE_TEST_CASE_P(CacheTestInstance, CacheTest,
testing::Values(kLRU, kClock, kFast));
testing::Values(kLRU, kClock));
#else
INSTANTIATE_TEST_CASE_P(CacheTestInstance, CacheTest,
testing::Values(kLRU, kFast));
INSTANTIATE_TEST_CASE_P(CacheTestInstance, CacheTest, testing::Values(kLRU));
#endif // SUPPORT_CLOCK_CACHE
INSTANTIATE_TEST_CASE_P(CacheTestInstance, LRUCacheTest,
testing::Values(kLRU, kFast));
INSTANTIATE_TEST_CASE_P(CacheTestInstance, LRUCacheTest, testing::Values(kLRU));
} // namespace ROCKSDB_NAMESPACE

172
cache/clock_cache.cc vendored
View File

@ -33,11 +33,11 @@ std::shared_ptr<Cache> NewClockCache(
#ifndef ROCKSDB_USE_RTTI
#define TBB_USE_EXCEPTIONS 0
#endif
#include "tbb/concurrent_hash_map.h"
#include "cache/sharded_cache.h"
#include "port/lang.h"
#include "port/malloc.h"
#include "port/port.h"
#include "tbb/concurrent_hash_map.h"
#include "util/autovector.h"
#include "util/mutexlock.h"
@ -176,13 +176,10 @@ namespace {
// Cache entry meta data.
struct CacheHandle {
Slice key;
uint32_t hash;
void* value;
size_t charge;
Cache::DeleterFn deleter;
uint32_t hash;
// Addition to "charge" to get "total charge" under metadata policy.
uint32_t meta_charge;
void (*deleter)(const Slice&, void* value);
// Flags and counters associated with the cache handle:
// lowest bit: in-cache bit
@ -208,8 +205,9 @@ struct CacheHandle {
return *this;
}
inline static uint32_t CalcMetadataCharge(
Slice key, CacheMetadataChargePolicy metadata_charge_policy) {
inline static size_t CalcTotalCharge(
Slice key, size_t charge,
CacheMetadataChargePolicy metadata_charge_policy) {
size_t meta_charge = 0;
if (metadata_charge_policy == kFullChargeCacheMetadata) {
meta_charge += sizeof(CacheHandle);
@ -220,30 +218,32 @@ struct CacheHandle {
meta_charge += key.size();
#endif
}
assert(meta_charge <= UINT32_MAX);
return static_cast<uint32_t>(meta_charge);
return charge + meta_charge;
}
inline size_t GetTotalCharge() { return charge + meta_charge; }
inline size_t CalcTotalCharge(
CacheMetadataChargePolicy metadata_charge_policy) {
return CalcTotalCharge(key, charge, metadata_charge_policy);
}
};
// Key of hash map. We store hash value with the key for convenience.
struct ClockCacheKey {
struct CacheKey {
Slice key;
uint32_t hash_value;
ClockCacheKey() = default;
CacheKey() = default;
ClockCacheKey(const Slice& k, uint32_t h) {
CacheKey(const Slice& k, uint32_t h) {
key = k;
hash_value = h;
}
static bool equal(const ClockCacheKey& a, const ClockCacheKey& b) {
static bool equal(const CacheKey& a, const CacheKey& b) {
return a.hash_value == b.hash_value && a.key == b.key;
}
static size_t hash(const ClockCacheKey& a) {
static size_t hash(const CacheKey& a) {
return static_cast<size_t>(a.hash_value);
}
};
@ -260,8 +260,7 @@ struct CleanupContext {
class ClockCacheShard final : public CacheShard {
public:
// Hash map type.
using HashTable =
tbb::concurrent_hash_map<ClockCacheKey, CacheHandle*, ClockCacheKey>;
typedef tbb::concurrent_hash_map<CacheKey, CacheHandle*, CacheKey> HashTable;
ClockCacheShard();
~ClockCacheShard() override;
@ -272,42 +271,21 @@ class ClockCacheShard final : public CacheShard {
Status Insert(const Slice& key, uint32_t hash, void* value, size_t charge,
void (*deleter)(const Slice& key, void* value),
Cache::Handle** handle, Cache::Priority priority) override;
Status Insert(const Slice& key, uint32_t hash, void* value,
const Cache::CacheItemHelper* helper, size_t charge,
Cache::Handle** handle, Cache::Priority priority) override {
return Insert(key, hash, value, charge, helper->del_cb, handle, priority);
}
Cache::Handle* Lookup(const Slice& key, uint32_t hash) override;
Cache::Handle* Lookup(const Slice& key, uint32_t hash,
const Cache::CacheItemHelper* /*helper*/,
const Cache::CreateCallback& /*create_cb*/,
Cache::Priority /*priority*/, bool /*wait*/,
Statistics* /*stats*/) override {
return Lookup(key, hash);
}
bool Release(Cache::Handle* handle, bool /*useful*/,
bool erase_if_last_ref) override {
return Release(handle, erase_if_last_ref);
}
bool IsReady(Cache::Handle* /*handle*/) override { return true; }
void Wait(Cache::Handle* /*handle*/) override {}
// If the entry in in cache, increase reference count and return true.
// Return false otherwise.
//
// Not necessary to hold mutex_ before being called.
bool Ref(Cache::Handle* handle) override;
bool Release(Cache::Handle* handle, bool erase_if_last_ref = false) override;
bool Release(Cache::Handle* handle, bool force_erase = false) override;
void Erase(const Slice& key, uint32_t hash) override;
bool EraseAndConfirm(const Slice& key, uint32_t hash,
CleanupContext* context);
size_t GetUsage() const override;
size_t GetPinnedUsage() const override;
void EraseUnRefEntries() override;
void ApplyToSomeEntries(
const std::function<void(const Slice& key, void* value, size_t charge,
DeleterFn deleter)>& callback,
uint32_t average_entries_per_lock, uint32_t* state) override;
void ApplyToAllCacheEntries(void (*callback)(void*, size_t),
bool thread_safe) override;
private:
static const uint32_t kInCacheBit = 1;
@ -426,46 +404,22 @@ size_t ClockCacheShard::GetPinnedUsage() const {
return pinned_usage_.load(std::memory_order_relaxed);
}
void ClockCacheShard::ApplyToSomeEntries(
const std::function<void(const Slice& key, void* value, size_t charge,
DeleterFn deleter)>& callback,
uint32_t average_entries_per_lock, uint32_t* state) {
assert(average_entries_per_lock > 0);
MutexLock lock(&mutex_);
// Figure out the range to iterate, update `state`
size_t list_size = list_.size();
size_t start_idx = *state;
size_t end_idx = start_idx + average_entries_per_lock;
if (start_idx > list_size) {
// Shouldn't reach here, but recoverable
assert(false);
// Mark finished with all
*state = UINT32_MAX;
return;
void ClockCacheShard::ApplyToAllCacheEntries(void (*callback)(void*, size_t),
bool thread_safe) {
if (thread_safe) {
mutex_.Lock();
}
if (end_idx >= list_size || end_idx >= UINT32_MAX) {
// This also includes the hypothetical case of >4 billion
// cache handles.
end_idx = list_size;
// Mark finished with all
*state = UINT32_MAX;
} else {
*state = static_cast<uint32_t>(end_idx);
}
// Do the iteration
auto cur = list_.begin() + start_idx;
auto end = list_.begin() + end_idx;
for (; cur != end; ++cur) {
const CacheHandle& handle = *cur;
// Use relaxed semantics instead of acquire semantics since we are
// holding mutex
for (auto& handle : list_) {
// Use relaxed semantics instead of acquire semantics since we are either
// holding mutex, or don't have thread safe requirement.
uint32_t flags = handle.flags.load(std::memory_order_relaxed);
if (InCache(flags)) {
callback(handle.key, handle.value, handle.charge, handle.deleter);
callback(handle.value, handle.charge);
}
}
if (thread_safe) {
mutex_.Unlock();
}
}
void ClockCacheShard::RecycleHandle(CacheHandle* handle,
@ -474,8 +428,10 @@ void ClockCacheShard::RecycleHandle(CacheHandle* handle,
assert(!InCache(handle->flags) && CountRefs(handle->flags) == 0);
context->to_delete_key.push_back(handle->key.data());
context->to_delete_value.emplace_back(*handle);
size_t total_charge = handle->GetTotalCharge();
// clearing `handle` fields would go here but not strictly required
size_t total_charge = handle->CalcTotalCharge(metadata_charge_policy_);
handle->key.clear();
handle->value = nullptr;
handle->deleter = nullptr;
recycle_.push_back(handle);
usage_.fetch_sub(total_charge, std::memory_order_relaxed);
}
@ -503,7 +459,7 @@ bool ClockCacheShard::Ref(Cache::Handle* h) {
std::memory_order_relaxed)) {
if (CountRefs(flags) == 0) {
// No reference count before the operation.
size_t total_charge = handle->GetTotalCharge();
size_t total_charge = handle->CalcTotalCharge(metadata_charge_policy_);
pinned_usage_.fetch_add(total_charge, std::memory_order_relaxed);
}
return true;
@ -517,11 +473,6 @@ bool ClockCacheShard::Unref(CacheHandle* handle, bool set_usage,
if (set_usage) {
handle->flags.fetch_or(kUsageBit, std::memory_order_relaxed);
}
// If the handle reaches state refs=0 and InCache=true after this
// atomic operation then we cannot access `handle` afterward, because
// it could be evicted before we access the `handle`.
size_t total_charge = handle->GetTotalCharge();
// Use acquire-release semantics as previous operations on the cache entry
// has to be order before reference count is decreased, and potential cleanup
// of the entry has to be order after.
@ -529,6 +480,7 @@ bool ClockCacheShard::Unref(CacheHandle* handle, bool set_usage,
assert(CountRefs(flags) > 0);
if (CountRefs(flags) == 1) {
// this is the last reference.
size_t total_charge = handle->CalcTotalCharge(metadata_charge_policy_);
pinned_usage_.fetch_sub(total_charge, std::memory_order_relaxed);
// Cleanup if it is the last reference.
if (!InCache(flags)) {
@ -560,7 +512,7 @@ bool ClockCacheShard::TryEvict(CacheHandle* handle, CleanupContext* context) {
if (handle->flags.compare_exchange_strong(flags, 0, std::memory_order_acquire,
std::memory_order_relaxed)) {
bool erased __attribute__((__unused__)) =
table_.erase(ClockCacheKey(handle->key, handle->hash));
table_.erase(CacheKey(handle->key, handle->hash));
assert(erased);
RecycleHandle(handle, context);
return true;
@ -615,9 +567,8 @@ CacheHandle* ClockCacheShard::Insert(
void (*deleter)(const Slice& key, void* value), bool hold_reference,
CleanupContext* context, bool* overwritten) {
assert(overwritten != nullptr && *overwritten == false);
uint32_t meta_charge =
CacheHandle::CalcMetadataCharge(key, metadata_charge_policy_);
size_t total_charge = charge + meta_charge;
size_t total_charge =
CacheHandle::CalcTotalCharge(key, charge, metadata_charge_policy_);
MutexLock l(&mutex_);
bool success = EvictFromCache(total_charge, context);
bool strict = strict_capacity_limit_.load(std::memory_order_relaxed);
@ -643,27 +594,17 @@ CacheHandle* ClockCacheShard::Insert(
handle->hash = hash;
handle->value = value;
handle->charge = charge;
handle->meta_charge = meta_charge;
handle->deleter = deleter;
uint32_t flags = hold_reference ? kInCacheBit + kOneRef : kInCacheBit;
// TODO investigate+fix suspected race condition:
// [thread 1] Lookup starts, up to Ref()
// [thread 2] Erase/evict the entry just looked up
// [thread 1] Ref() the handle, even though it's in the recycle bin
// [thread 2] Insert with recycling that handle
// Here we obliterate the other thread's Ref
// Possible fix: never blindly overwrite the flags, but only make
// relative updates (fetch_add, etc).
handle->flags.store(flags, std::memory_order_relaxed);
HashTable::accessor accessor;
if (table_.find(accessor, ClockCacheKey(key, hash))) {
if (table_.find(accessor, CacheKey(key, hash))) {
*overwritten = true;
CacheHandle* existing_handle = accessor->second;
table_.erase(accessor);
UnsetInCache(existing_handle, context);
}
table_.insert(HashTable::value_type(ClockCacheKey(key, hash), handle));
table_.insert(HashTable::value_type(CacheKey(key, hash), handle));
if (hold_reference) {
pinned_usage_.fetch_add(total_charge, std::memory_order_relaxed);
}
@ -687,7 +628,7 @@ Status ClockCacheShard::Insert(const Slice& key, uint32_t hash, void* value,
Status s;
if (out_handle != nullptr) {
if (handle == nullptr) {
s = Status::Incomplete("Insert failed due to CLOCK cache being full.");
s = Status::Incomplete("Insert failed due to LRU cache being full.");
} else {
*out_handle = reinterpret_cast<Cache::Handle*>(handle);
}
@ -702,7 +643,7 @@ Status ClockCacheShard::Insert(const Slice& key, uint32_t hash, void* value,
Cache::Handle* ClockCacheShard::Lookup(const Slice& key, uint32_t hash) {
HashTable::const_accessor accessor;
if (!table_.find(accessor, ClockCacheKey(key, hash))) {
if (!table_.find(accessor, CacheKey(key, hash))) {
return nullptr;
}
CacheHandle* handle = accessor->second;
@ -725,11 +666,11 @@ Cache::Handle* ClockCacheShard::Lookup(const Slice& key, uint32_t hash) {
return reinterpret_cast<Cache::Handle*>(handle);
}
bool ClockCacheShard::Release(Cache::Handle* h, bool erase_if_last_ref) {
bool ClockCacheShard::Release(Cache::Handle* h, bool force_erase) {
CleanupContext context;
CacheHandle* handle = reinterpret_cast<CacheHandle*>(h);
bool erased = Unref(handle, true, &context);
if (erase_if_last_ref && !erased) {
if (force_erase && !erased) {
erased = EraseAndConfirm(handle->key, handle->hash, &context);
}
Cleanup(context);
@ -747,7 +688,7 @@ bool ClockCacheShard::EraseAndConfirm(const Slice& key, uint32_t hash,
MutexLock l(&mutex_);
HashTable::accessor accessor;
bool erased = false;
if (table_.find(accessor, ClockCacheKey(key, hash))) {
if (table_.find(accessor, CacheKey(key, hash))) {
CacheHandle* handle = accessor->second;
table_.erase(accessor);
erased = UnsetInCache(handle, context);
@ -785,11 +726,11 @@ class ClockCache final : public ShardedCache {
const char* Name() const override { return "ClockCache"; }
CacheShard* GetShard(uint32_t shard) override {
CacheShard* GetShard(int shard) override {
return reinterpret_cast<CacheShard*>(&shards_[shard]);
}
const CacheShard* GetShard(uint32_t shard) const override {
const CacheShard* GetShard(int shard) const override {
return reinterpret_cast<CacheShard*>(&shards_[shard]);
}
@ -805,18 +746,7 @@ class ClockCache final : public ShardedCache {
return reinterpret_cast<const CacheHandle*>(handle)->hash;
}
DeleterFn GetDeleter(Handle* handle) const override {
return reinterpret_cast<const CacheHandle*>(handle)->deleter;
}
void DisownData() override {
// Leak data only if that won't generate an ASAN/valgrind warning
if (!kMustFreeHeapAllocations) {
shards_ = nullptr;
}
}
void WaitAll(std::vector<Handle*>& /*handles*/) override {}
void DisownData() override { shards_ = nullptr; }
private:
ClockCacheShard* shards_;

View File

@ -1,171 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "cache/compressed_secondary_cache.h"
#include <memory>
#include "memory/memory_allocator.h"
#include "util/compression.h"
#include "util/string_util.h"
namespace ROCKSDB_NAMESPACE {
namespace {
void DeletionCallback(const Slice& /*key*/, void* obj) {
delete reinterpret_cast<CacheAllocationPtr*>(obj);
obj = nullptr;
}
} // namespace
CompressedSecondaryCache::CompressedSecondaryCache(
size_t capacity, int num_shard_bits, bool strict_capacity_limit,
double high_pri_pool_ratio,
std::shared_ptr<MemoryAllocator> memory_allocator, bool use_adaptive_mutex,
CacheMetadataChargePolicy metadata_charge_policy,
CompressionType compression_type, uint32_t compress_format_version)
: cache_options_(capacity, num_shard_bits, strict_capacity_limit,
high_pri_pool_ratio, memory_allocator, use_adaptive_mutex,
metadata_charge_policy, compression_type,
compress_format_version) {
cache_ = NewLRUCache(capacity, num_shard_bits, strict_capacity_limit,
high_pri_pool_ratio, memory_allocator,
use_adaptive_mutex, metadata_charge_policy);
}
CompressedSecondaryCache::~CompressedSecondaryCache() { cache_.reset(); }
std::unique_ptr<SecondaryCacheResultHandle> CompressedSecondaryCache::Lookup(
const Slice& key, const Cache::CreateCallback& create_cb, bool /*wait*/,
bool& is_in_sec_cache) {
std::unique_ptr<SecondaryCacheResultHandle> handle;
is_in_sec_cache = false;
Cache::Handle* lru_handle = cache_->Lookup(key);
if (lru_handle == nullptr) {
return handle;
}
CacheAllocationPtr* ptr =
reinterpret_cast<CacheAllocationPtr*>(cache_->Value(lru_handle));
void* value = nullptr;
size_t charge = 0;
Status s;
if (cache_options_.compression_type == kNoCompression) {
s = create_cb(ptr->get(), cache_->GetCharge(lru_handle), &value, &charge);
} else {
UncompressionContext uncompression_context(cache_options_.compression_type);
UncompressionInfo uncompression_info(uncompression_context,
UncompressionDict::GetEmptyDict(),
cache_options_.compression_type);
size_t uncompressed_size = 0;
CacheAllocationPtr uncompressed;
uncompressed = UncompressData(
uncompression_info, (char*)ptr->get(), cache_->GetCharge(lru_handle),
&uncompressed_size, cache_options_.compress_format_version,
cache_options_.memory_allocator.get());
if (!uncompressed) {
cache_->Release(lru_handle, /* erase_if_last_ref */ true);
return handle;
}
s = create_cb(uncompressed.get(), uncompressed_size, &value, &charge);
}
if (!s.ok()) {
cache_->Release(lru_handle, /* erase_if_last_ref */ true);
return handle;
}
cache_->Release(lru_handle, /* erase_if_last_ref */ true);
handle.reset(new CompressedSecondaryCacheResultHandle(value, charge));
return handle;
}
Status CompressedSecondaryCache::Insert(const Slice& key, void* value,
const Cache::CacheItemHelper* helper) {
size_t size = (*helper->size_cb)(value);
CacheAllocationPtr ptr =
AllocateBlock(size, cache_options_.memory_allocator.get());
Status s = (*helper->saveto_cb)(value, 0, size, ptr.get());
if (!s.ok()) {
return s;
}
Slice val(ptr.get(), size);
std::string compressed_val;
if (cache_options_.compression_type != kNoCompression) {
CompressionOptions compression_opts;
CompressionContext compression_context(cache_options_.compression_type);
uint64_t sample_for_compression = 0;
CompressionInfo compression_info(
compression_opts, compression_context, CompressionDict::GetEmptyDict(),
cache_options_.compression_type, sample_for_compression);
bool success =
CompressData(val, compression_info,
cache_options_.compress_format_version, &compressed_val);
if (!success) {
return Status::Corruption("Error compressing value.");
}
val = Slice(compressed_val);
size = compressed_val.size();
ptr = AllocateBlock(size, cache_options_.memory_allocator.get());
memcpy(ptr.get(), compressed_val.data(), size);
}
CacheAllocationPtr* buf = new CacheAllocationPtr(std::move(ptr));
return cache_->Insert(key, buf, size, DeletionCallback);
}
void CompressedSecondaryCache::Erase(const Slice& key) { cache_->Erase(key); }
std::string CompressedSecondaryCache::GetPrintableOptions() const {
std::string ret;
ret.reserve(20000);
const int kBufferSize = 200;
char buffer[kBufferSize];
ret.append(cache_->GetPrintableOptions());
snprintf(buffer, kBufferSize, " compression_type : %s\n",
CompressionTypeToString(cache_options_.compression_type).c_str());
ret.append(buffer);
snprintf(buffer, kBufferSize, " compression_type : %d\n",
cache_options_.compress_format_version);
ret.append(buffer);
return ret;
}
std::shared_ptr<SecondaryCache> NewCompressedSecondaryCache(
size_t capacity, int num_shard_bits, bool strict_capacity_limit,
double high_pri_pool_ratio,
std::shared_ptr<MemoryAllocator> memory_allocator, bool use_adaptive_mutex,
CacheMetadataChargePolicy metadata_charge_policy,
CompressionType compression_type, uint32_t compress_format_version) {
return std::make_shared<CompressedSecondaryCache>(
capacity, num_shard_bits, strict_capacity_limit, high_pri_pool_ratio,
memory_allocator, use_adaptive_mutex, metadata_charge_policy,
compression_type, compress_format_version);
}
std::shared_ptr<SecondaryCache> NewCompressedSecondaryCache(
const CompressedSecondaryCacheOptions& opts) {
// The secondary_cache is disabled for this LRUCache instance.
assert(opts.secondary_cache == nullptr);
return NewCompressedSecondaryCache(
opts.capacity, opts.num_shard_bits, opts.strict_capacity_limit,
opts.high_pri_pool_ratio, opts.memory_allocator, opts.use_adaptive_mutex,
opts.metadata_charge_policy, opts.compression_type,
opts.compress_format_version);
}
} // namespace ROCKSDB_NAMESPACE

View File

@ -1,86 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#include <memory>
#include "cache/lru_cache.h"
#include "memory/memory_allocator.h"
#include "rocksdb/secondary_cache.h"
#include "rocksdb/slice.h"
#include "rocksdb/status.h"
#include "util/compression.h"
namespace ROCKSDB_NAMESPACE {
class CompressedSecondaryCacheResultHandle : public SecondaryCacheResultHandle {
public:
CompressedSecondaryCacheResultHandle(void* value, size_t size)
: value_(value), size_(size) {}
virtual ~CompressedSecondaryCacheResultHandle() override = default;
CompressedSecondaryCacheResultHandle(
const CompressedSecondaryCacheResultHandle&) = delete;
CompressedSecondaryCacheResultHandle& operator=(
const CompressedSecondaryCacheResultHandle&) = delete;
bool IsReady() override { return true; }
void Wait() override {}
void* Value() override { return value_; }
size_t Size() override { return size_; }
private:
void* value_;
size_t size_;
};
// The CompressedSecondaryCache is a concrete implementation of
// rocksdb::SecondaryCache.
//
// Users can also cast a pointer to it and call methods on
// it directly, especially custom methods that may be added
// in the future. For example -
// std::unique_ptr<rocksdb::SecondaryCache> cache =
// NewCompressedSecondaryCache(opts);
// static_cast<CompressedSecondaryCache*>(cache.get())->Erase(key);
class CompressedSecondaryCache : public SecondaryCache {
public:
CompressedSecondaryCache(
size_t capacity, int num_shard_bits, bool strict_capacity_limit,
double high_pri_pool_ratio,
std::shared_ptr<MemoryAllocator> memory_allocator = nullptr,
bool use_adaptive_mutex = kDefaultToAdaptiveMutex,
CacheMetadataChargePolicy metadata_charge_policy =
kDontChargeCacheMetadata,
CompressionType compression_type = CompressionType::kLZ4Compression,
uint32_t compress_format_version = 2);
virtual ~CompressedSecondaryCache() override;
const char* Name() const override { return "CompressedSecondaryCache"; }
Status Insert(const Slice& key, void* value,
const Cache::CacheItemHelper* helper) override;
std::unique_ptr<SecondaryCacheResultHandle> Lookup(
const Slice& key, const Cache::CreateCallback& create_cb, bool /*wait*/,
bool& is_in_sec_cache) override;
void Erase(const Slice& key) override;
void WaitAll(std::vector<SecondaryCacheResultHandle*> /*handles*/) override {}
std::string GetPrintableOptions() const override;
private:
std::shared_ptr<Cache> cache_;
CompressedSecondaryCacheOptions cache_options_;
};
} // namespace ROCKSDB_NAMESPACE

View File

@ -1,607 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "cache/compressed_secondary_cache.h"
#include <algorithm>
#include <cstdint>
#include "memory/jemalloc_nodump_allocator.h"
#include "memory/memory_allocator.h"
#include "test_util/testharness.h"
#include "test_util/testutil.h"
#include "util/compression.h"
#include "util/random.h"
namespace ROCKSDB_NAMESPACE {
class CompressedSecondaryCacheTest : public testing::Test {
public:
CompressedSecondaryCacheTest() : fail_create_(false) {}
~CompressedSecondaryCacheTest() {}
protected:
class TestItem {
public:
TestItem(const char* buf, size_t size) : buf_(new char[size]), size_(size) {
memcpy(buf_.get(), buf, size);
}
~TestItem() {}
char* Buf() { return buf_.get(); }
size_t Size() { return size_; }
private:
std::unique_ptr<char[]> buf_;
size_t size_;
};
static size_t SizeCallback(void* obj) {
return reinterpret_cast<TestItem*>(obj)->Size();
}
static Status SaveToCallback(void* from_obj, size_t from_offset,
size_t length, void* out) {
TestItem* item = reinterpret_cast<TestItem*>(from_obj);
const char* buf = item->Buf();
EXPECT_EQ(length, item->Size());
EXPECT_EQ(from_offset, 0);
memcpy(out, buf, length);
return Status::OK();
}
static void DeletionCallback(const Slice& /*key*/, void* obj) {
delete reinterpret_cast<TestItem*>(obj);
obj = nullptr;
}
static Cache::CacheItemHelper helper_;
static Status SaveToCallbackFail(void* /*obj*/, size_t /*offset*/,
size_t /*size*/, void* /*out*/) {
return Status::NotSupported();
}
static Cache::CacheItemHelper helper_fail_;
Cache::CreateCallback test_item_creator = [&](const void* buf, size_t size,
void** out_obj,
size_t* charge) -> Status {
if (fail_create_) {
return Status::NotSupported();
}
*out_obj = reinterpret_cast<void*>(new TestItem((char*)buf, size));
*charge = size;
return Status::OK();
};
void SetFailCreate(bool fail) { fail_create_ = fail; }
void BasicTest(bool sec_cache_is_compressed, bool use_jemalloc) {
CompressedSecondaryCacheOptions opts;
opts.capacity = 2048;
opts.num_shard_bits = 0;
opts.metadata_charge_policy = kDontChargeCacheMetadata;
if (sec_cache_is_compressed) {
if (!LZ4_Supported()) {
ROCKSDB_GTEST_SKIP("This test requires LZ4 support.");
opts.compression_type = CompressionType::kNoCompression;
}
} else {
opts.compression_type = CompressionType::kNoCompression;
}
if (use_jemalloc) {
JemallocAllocatorOptions jopts;
std::shared_ptr<MemoryAllocator> allocator;
std::string msg;
if (JemallocNodumpAllocator::IsSupported(&msg)) {
Status s = NewJemallocNodumpAllocator(jopts, &allocator);
if (s.ok()) {
opts.memory_allocator = allocator;
}
} else {
ROCKSDB_GTEST_BYPASS("JEMALLOC not supported");
}
}
std::shared_ptr<SecondaryCache> sec_cache =
NewCompressedSecondaryCache(opts);
bool is_in_sec_cache{true};
// Lookup an non-existent key.
std::unique_ptr<SecondaryCacheResultHandle> handle0 =
sec_cache->Lookup("k0", test_item_creator, true, is_in_sec_cache);
ASSERT_EQ(handle0, nullptr);
Random rnd(301);
// Insert and Lookup the first item.
std::string str1;
test::CompressibleString(&rnd, 0.25, 1000, &str1);
TestItem item1(str1.data(), str1.length());
ASSERT_OK(sec_cache->Insert("k1", &item1,
&CompressedSecondaryCacheTest::helper_));
std::unique_ptr<SecondaryCacheResultHandle> handle1 =
sec_cache->Lookup("k1", test_item_creator, true, is_in_sec_cache);
ASSERT_NE(handle1, nullptr);
ASSERT_FALSE(is_in_sec_cache);
std::unique_ptr<TestItem> val1 =
std::unique_ptr<TestItem>(static_cast<TestItem*>(handle1->Value()));
ASSERT_NE(val1, nullptr);
ASSERT_EQ(memcmp(val1->Buf(), item1.Buf(), item1.Size()), 0);
// Lookup the first item again.
std::unique_ptr<SecondaryCacheResultHandle> handle1_1 =
sec_cache->Lookup("k1", test_item_creator, true, is_in_sec_cache);
ASSERT_EQ(handle1_1, nullptr);
// Insert and Lookup the second item.
std::string str2;
test::CompressibleString(&rnd, 0.5, 1000, &str2);
TestItem item2(str2.data(), str2.length());
ASSERT_OK(sec_cache->Insert("k2", &item2,
&CompressedSecondaryCacheTest::helper_));
std::unique_ptr<SecondaryCacheResultHandle> handle2 =
sec_cache->Lookup("k2", test_item_creator, true, is_in_sec_cache);
ASSERT_NE(handle2, nullptr);
std::unique_ptr<TestItem> val2 =
std::unique_ptr<TestItem>(static_cast<TestItem*>(handle2->Value()));
ASSERT_NE(val2, nullptr);
ASSERT_EQ(memcmp(val2->Buf(), item2.Buf(), item2.Size()), 0);
std::vector<SecondaryCacheResultHandle*> handles = {handle1.get(),
handle2.get()};
sec_cache->WaitAll(handles);
sec_cache.reset();
}
void FailsTest(bool sec_cache_is_compressed) {
CompressedSecondaryCacheOptions secondary_cache_opts;
if (sec_cache_is_compressed) {
if (!LZ4_Supported()) {
ROCKSDB_GTEST_SKIP("This test requires LZ4 support.");
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
} else {
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
secondary_cache_opts.capacity = 1100;
secondary_cache_opts.num_shard_bits = 0;
secondary_cache_opts.metadata_charge_policy = kDontChargeCacheMetadata;
std::shared_ptr<SecondaryCache> sec_cache =
NewCompressedSecondaryCache(secondary_cache_opts);
// Insert and Lookup the first item.
Random rnd(301);
std::string str1(rnd.RandomString(1000));
TestItem item1(str1.data(), str1.length());
ASSERT_OK(sec_cache->Insert("k1", &item1,
&CompressedSecondaryCacheTest::helper_));
// Insert and Lookup the second item.
std::string str2(rnd.RandomString(200));
TestItem item2(str2.data(), str2.length());
// k1 is evicted.
ASSERT_OK(sec_cache->Insert("k2", &item2,
&CompressedSecondaryCacheTest::helper_));
bool is_in_sec_cache{false};
std::unique_ptr<SecondaryCacheResultHandle> handle1_1 =
sec_cache->Lookup("k1", test_item_creator, true, is_in_sec_cache);
ASSERT_EQ(handle1_1, nullptr);
std::unique_ptr<SecondaryCacheResultHandle> handle2 =
sec_cache->Lookup("k2", test_item_creator, true, is_in_sec_cache);
ASSERT_NE(handle2, nullptr);
std::unique_ptr<TestItem> val2 =
std::unique_ptr<TestItem>(static_cast<TestItem*>(handle2->Value()));
ASSERT_NE(val2, nullptr);
ASSERT_EQ(memcmp(val2->Buf(), item2.Buf(), item2.Size()), 0);
// Create Fails.
SetFailCreate(true);
std::unique_ptr<SecondaryCacheResultHandle> handle2_1 =
sec_cache->Lookup("k2", test_item_creator, true, is_in_sec_cache);
ASSERT_EQ(handle2_1, nullptr);
// Save Fails.
std::string str3 = rnd.RandomString(10);
TestItem item3(str3.data(), str3.length());
ASSERT_NOK(sec_cache->Insert("k3", &item3,
&CompressedSecondaryCacheTest::helper_fail_));
sec_cache.reset();
}
void BasicIntegrationTest(bool sec_cache_is_compressed) {
CompressedSecondaryCacheOptions secondary_cache_opts;
if (sec_cache_is_compressed) {
if (!LZ4_Supported()) {
ROCKSDB_GTEST_SKIP("This test requires LZ4 support.");
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
} else {
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
secondary_cache_opts.capacity = 2300;
secondary_cache_opts.num_shard_bits = 0;
secondary_cache_opts.metadata_charge_policy = kDontChargeCacheMetadata;
std::shared_ptr<SecondaryCache> secondary_cache =
NewCompressedSecondaryCache(secondary_cache_opts);
LRUCacheOptions lru_cache_opts(1024, 0, false, 0.5, nullptr,
kDefaultToAdaptiveMutex,
kDontChargeCacheMetadata);
lru_cache_opts.secondary_cache = secondary_cache;
std::shared_ptr<Cache> cache = NewLRUCache(lru_cache_opts);
std::shared_ptr<Statistics> stats = CreateDBStatistics();
Random rnd(301);
std::string str1 = rnd.RandomString(1010);
std::string str1_clone{str1};
TestItem* item1 = new TestItem(str1.data(), str1.length());
ASSERT_OK(cache->Insert("k1", item1, &CompressedSecondaryCacheTest::helper_,
str1.length()));
std::string str2 = rnd.RandomString(1020);
TestItem* item2 = new TestItem(str2.data(), str2.length());
// After Insert, lru cache contains k2 and secondary cache contains k1.
ASSERT_OK(cache->Insert("k2", item2, &CompressedSecondaryCacheTest::helper_,
str2.length()));
std::string str3 = rnd.RandomString(1020);
TestItem* item3 = new TestItem(str3.data(), str3.length());
// After Insert, lru cache contains k3 and secondary cache contains k1 and
// k2
ASSERT_OK(cache->Insert("k3", item3, &CompressedSecondaryCacheTest::helper_,
str3.length()));
Cache::Handle* handle;
handle = cache->Lookup("k3", &CompressedSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true,
stats.get());
ASSERT_NE(handle, nullptr);
TestItem* val3 = static_cast<TestItem*>(cache->Value(handle));
ASSERT_NE(val3, nullptr);
ASSERT_EQ(memcmp(val3->Buf(), item3->Buf(), item3->Size()), 0);
cache->Release(handle);
// Lookup an non-existent key.
handle = cache->Lookup("k0", &CompressedSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true,
stats.get());
ASSERT_EQ(handle, nullptr);
// This Lookup should promote k1 and erase k1 from the secondary cache,
// then k3 is demoted. So k2 and k3 are in the secondary cache.
handle = cache->Lookup("k1", &CompressedSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true,
stats.get());
ASSERT_NE(handle, nullptr);
TestItem* val1_1 = static_cast<TestItem*>(cache->Value(handle));
ASSERT_NE(val1_1, nullptr);
ASSERT_EQ(memcmp(val1_1->Buf(), str1_clone.data(), str1_clone.size()), 0);
cache->Release(handle);
handle = cache->Lookup("k2", &CompressedSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true,
stats.get());
ASSERT_NE(handle, nullptr);
cache->Release(handle);
cache.reset();
secondary_cache.reset();
}
void BasicIntegrationFailTest(bool sec_cache_is_compressed) {
CompressedSecondaryCacheOptions secondary_cache_opts;
if (sec_cache_is_compressed) {
if (!LZ4_Supported()) {
ROCKSDB_GTEST_SKIP("This test requires LZ4 support.");
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
} else {
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
secondary_cache_opts.capacity = 2048;
secondary_cache_opts.num_shard_bits = 0;
secondary_cache_opts.metadata_charge_policy = kDontChargeCacheMetadata;
std::shared_ptr<SecondaryCache> secondary_cache =
NewCompressedSecondaryCache(secondary_cache_opts);
LRUCacheOptions opts(1024, 0, false, 0.5, nullptr, kDefaultToAdaptiveMutex,
kDontChargeCacheMetadata);
opts.secondary_cache = secondary_cache;
std::shared_ptr<Cache> cache = NewLRUCache(opts);
Random rnd(301);
std::string str1 = rnd.RandomString(1020);
auto item1 =
std::unique_ptr<TestItem>(new TestItem(str1.data(), str1.length()));
ASSERT_NOK(cache->Insert("k1", item1.get(), nullptr, str1.length()));
ASSERT_OK(cache->Insert("k1", item1.get(),
&CompressedSecondaryCacheTest::helper_,
str1.length()));
item1.release(); // Appease clang-analyze "potential memory leak"
Cache::Handle* handle;
handle = cache->Lookup("k2", nullptr, test_item_creator,
Cache::Priority::LOW, true);
ASSERT_EQ(handle, nullptr);
handle = cache->Lookup("k2", &CompressedSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, false);
ASSERT_EQ(handle, nullptr);
cache.reset();
secondary_cache.reset();
}
void IntegrationSaveFailTest(bool sec_cache_is_compressed) {
CompressedSecondaryCacheOptions secondary_cache_opts;
if (sec_cache_is_compressed) {
if (!LZ4_Supported()) {
ROCKSDB_GTEST_SKIP("This test requires LZ4 support.");
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
} else {
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
secondary_cache_opts.capacity = 2048;
secondary_cache_opts.num_shard_bits = 0;
secondary_cache_opts.metadata_charge_policy = kDontChargeCacheMetadata;
std::shared_ptr<SecondaryCache> secondary_cache =
NewCompressedSecondaryCache(secondary_cache_opts);
LRUCacheOptions opts(1024, 0, false, 0.5, nullptr, kDefaultToAdaptiveMutex,
kDontChargeCacheMetadata);
opts.secondary_cache = secondary_cache;
std::shared_ptr<Cache> cache = NewLRUCache(opts);
Random rnd(301);
std::string str1 = rnd.RandomString(1020);
TestItem* item1 = new TestItem(str1.data(), str1.length());
ASSERT_OK(cache->Insert("k1", item1,
&CompressedSecondaryCacheTest::helper_fail_,
str1.length()));
std::string str2 = rnd.RandomString(1020);
TestItem* item2 = new TestItem(str2.data(), str2.length());
// k1 should be demoted to the secondary cache.
ASSERT_OK(cache->Insert("k2", item2,
&CompressedSecondaryCacheTest::helper_fail_,
str2.length()));
Cache::Handle* handle;
handle = cache->Lookup("k2", &CompressedSecondaryCacheTest::helper_fail_,
test_item_creator, Cache::Priority::LOW, true);
ASSERT_NE(handle, nullptr);
cache->Release(handle);
// This lookup should fail, since k1 demotion would have failed
handle = cache->Lookup("k1", &CompressedSecondaryCacheTest::helper_fail_,
test_item_creator, Cache::Priority::LOW, true);
ASSERT_EQ(handle, nullptr);
// Since k1 didn't get promoted, k2 should still be in cache
handle = cache->Lookup("k2", &CompressedSecondaryCacheTest::helper_fail_,
test_item_creator, Cache::Priority::LOW, true);
ASSERT_NE(handle, nullptr);
cache->Release(handle);
cache.reset();
secondary_cache.reset();
}
void IntegrationCreateFailTest(bool sec_cache_is_compressed) {
CompressedSecondaryCacheOptions secondary_cache_opts;
if (sec_cache_is_compressed) {
if (!LZ4_Supported()) {
ROCKSDB_GTEST_SKIP("This test requires LZ4 support.");
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
} else {
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
secondary_cache_opts.capacity = 2048;
secondary_cache_opts.num_shard_bits = 0;
secondary_cache_opts.metadata_charge_policy = kDontChargeCacheMetadata;
std::shared_ptr<SecondaryCache> secondary_cache =
NewCompressedSecondaryCache(secondary_cache_opts);
LRUCacheOptions opts(1024, 0, false, 0.5, nullptr, kDefaultToAdaptiveMutex,
kDontChargeCacheMetadata);
opts.secondary_cache = secondary_cache;
std::shared_ptr<Cache> cache = NewLRUCache(opts);
Random rnd(301);
std::string str1 = rnd.RandomString(1020);
TestItem* item1 = new TestItem(str1.data(), str1.length());
ASSERT_OK(cache->Insert("k1", item1, &CompressedSecondaryCacheTest::helper_,
str1.length()));
std::string str2 = rnd.RandomString(1020);
TestItem* item2 = new TestItem(str2.data(), str2.length());
// k1 should be demoted to the secondary cache.
ASSERT_OK(cache->Insert("k2", item2, &CompressedSecondaryCacheTest::helper_,
str2.length()));
Cache::Handle* handle;
SetFailCreate(true);
handle = cache->Lookup("k2", &CompressedSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true);
ASSERT_NE(handle, nullptr);
cache->Release(handle);
// This lookup should fail, since k1 creation would have failed
handle = cache->Lookup("k1", &CompressedSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true);
ASSERT_EQ(handle, nullptr);
// Since k1 didn't get promoted, k2 should still be in cache
handle = cache->Lookup("k2", &CompressedSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true);
ASSERT_NE(handle, nullptr);
cache->Release(handle);
cache.reset();
secondary_cache.reset();
}
void IntegrationFullCapacityTest(bool sec_cache_is_compressed) {
CompressedSecondaryCacheOptions secondary_cache_opts;
if (sec_cache_is_compressed) {
if (!LZ4_Supported()) {
ROCKSDB_GTEST_SKIP("This test requires LZ4 support.");
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
} else {
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
secondary_cache_opts.capacity = 2048;
secondary_cache_opts.num_shard_bits = 0;
secondary_cache_opts.metadata_charge_policy = kDontChargeCacheMetadata;
std::shared_ptr<SecondaryCache> secondary_cache =
NewCompressedSecondaryCache(secondary_cache_opts);
LRUCacheOptions opts(1024, 0, /*_strict_capacity_limit=*/true, 0.5, nullptr,
kDefaultToAdaptiveMutex, kDontChargeCacheMetadata);
opts.secondary_cache = secondary_cache;
std::shared_ptr<Cache> cache = NewLRUCache(opts);
Random rnd(301);
std::string str1 = rnd.RandomString(1020);
TestItem* item1 = new TestItem(str1.data(), str1.length());
ASSERT_OK(cache->Insert("k1", item1, &CompressedSecondaryCacheTest::helper_,
str1.length()));
std::string str2 = rnd.RandomString(1020);
TestItem* item2 = new TestItem(str2.data(), str2.length());
// k1 should be demoted to the secondary cache.
ASSERT_OK(cache->Insert("k2", item2, &CompressedSecondaryCacheTest::helper_,
str2.length()));
Cache::Handle* handle2;
handle2 = cache->Lookup("k2", &CompressedSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true);
ASSERT_NE(handle2, nullptr);
cache->Release(handle2);
// k1 promotion should fail due to the block cache being at capacity,
// but the lookup should still succeed
Cache::Handle* handle1;
handle1 = cache->Lookup("k1", &CompressedSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true);
ASSERT_NE(handle1, nullptr);
cache->Release(handle1);
// Since k1 didn't get inserted, k2 should still be in cache
handle2 = cache->Lookup("k2", &CompressedSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true);
ASSERT_NE(handle2, nullptr);
cache->Release(handle2);
cache.reset();
secondary_cache.reset();
}
private:
bool fail_create_;
};
Cache::CacheItemHelper CompressedSecondaryCacheTest::helper_(
CompressedSecondaryCacheTest::SizeCallback,
CompressedSecondaryCacheTest::SaveToCallback,
CompressedSecondaryCacheTest::DeletionCallback);
Cache::CacheItemHelper CompressedSecondaryCacheTest::helper_fail_(
CompressedSecondaryCacheTest::SizeCallback,
CompressedSecondaryCacheTest::SaveToCallbackFail,
CompressedSecondaryCacheTest::DeletionCallback);
TEST_F(CompressedSecondaryCacheTest, BasicTestWithNoCompression) {
BasicTest(false, false);
}
TEST_F(CompressedSecondaryCacheTest,
BasicTestWithMemoryAllocatorAndNoCompression) {
BasicTest(false, true);
}
TEST_F(CompressedSecondaryCacheTest, BasicTestWithCompression) {
BasicTest(true, false);
}
TEST_F(CompressedSecondaryCacheTest,
BasicTestWithMemoryAllocatorAndCompression) {
BasicTest(true, true);
}
TEST_F(CompressedSecondaryCacheTest, FailsTestWithNoCompression) {
FailsTest(false);
}
TEST_F(CompressedSecondaryCacheTest, FailsTestWithCompression) {
FailsTest(true);
}
TEST_F(CompressedSecondaryCacheTest, BasicIntegrationTestWithNoCompression) {
BasicIntegrationTest(false);
}
TEST_F(CompressedSecondaryCacheTest, BasicIntegrationTestWithCompression) {
BasicIntegrationTest(true);
}
TEST_F(CompressedSecondaryCacheTest,
BasicIntegrationFailTestWithNoCompression) {
BasicIntegrationFailTest(false);
}
TEST_F(CompressedSecondaryCacheTest, BasicIntegrationFailTestWithCompression) {
BasicIntegrationFailTest(true);
}
TEST_F(CompressedSecondaryCacheTest, IntegrationSaveFailTestWithNoCompression) {
IntegrationSaveFailTest(false);
}
TEST_F(CompressedSecondaryCacheTest, IntegrationSaveFailTestWithCompression) {
IntegrationSaveFailTest(true);
}
TEST_F(CompressedSecondaryCacheTest,
IntegrationCreateFailTestWithNoCompression) {
IntegrationCreateFailTest(false);
}
TEST_F(CompressedSecondaryCacheTest, IntegrationCreateFailTestWithCompression) {
IntegrationCreateFailTest(true);
}
TEST_F(CompressedSecondaryCacheTest,
IntegrationFullCapacityTestWithNoCompression) {
IntegrationFullCapacityTest(false);
}
TEST_F(CompressedSecondaryCacheTest,
IntegrationFullCapacityTestWithCompression) {
IntegrationFullCapacityTest(true);
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

View File

@ -1,511 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "cache/fast_lru_cache.h"
#include <cassert>
#include <cstdint>
#include <cstdio>
#include "monitoring/perf_context_imp.h"
#include "monitoring/statistics.h"
#include "port/lang.h"
#include "util/mutexlock.h"
namespace ROCKSDB_NAMESPACE {
namespace fast_lru_cache {
LRUHandleTable::LRUHandleTable(int max_upper_hash_bits)
: length_bits_(/* historical starting size*/ 4),
list_(new LRUHandle* [size_t{1} << length_bits_] {}),
elems_(0),
max_length_bits_(max_upper_hash_bits) {}
LRUHandleTable::~LRUHandleTable() {
ApplyToEntriesRange(
[](LRUHandle* h) {
if (!h->HasRefs()) {
h->Free();
}
},
0, uint32_t{1} << length_bits_);
}
LRUHandle* LRUHandleTable::Lookup(const Slice& key, uint32_t hash) {
return *FindPointer(key, hash);
}
LRUHandle* LRUHandleTable::Insert(LRUHandle* h) {
LRUHandle** ptr = FindPointer(h->key(), h->hash);
LRUHandle* old = *ptr;
h->next_hash = (old == nullptr ? nullptr : old->next_hash);
*ptr = h;
if (old == nullptr) {
++elems_;
if ((elems_ >> length_bits_) > 0) { // elems_ >= length
// Since each cache entry is fairly large, we aim for a small
// average linked list length (<= 1).
Resize();
}
}
return old;
}
LRUHandle* LRUHandleTable::Remove(const Slice& key, uint32_t hash) {
LRUHandle** ptr = FindPointer(key, hash);
LRUHandle* result = *ptr;
if (result != nullptr) {
*ptr = result->next_hash;
--elems_;
}
return result;
}
LRUHandle** LRUHandleTable::FindPointer(const Slice& key, uint32_t hash) {
LRUHandle** ptr = &list_[hash >> (32 - length_bits_)];
while (*ptr != nullptr && ((*ptr)->hash != hash || key != (*ptr)->key())) {
ptr = &(*ptr)->next_hash;
}
return ptr;
}
void LRUHandleTable::Resize() {
if (length_bits_ >= max_length_bits_) {
// Due to reaching limit of hash information, if we made the table bigger,
// we would allocate more addresses but only the same number would be used.
return;
}
if (length_bits_ >= 31) {
// Avoid undefined behavior shifting uint32_t by 32.
return;
}
uint32_t old_length = uint32_t{1} << length_bits_;
int new_length_bits = length_bits_ + 1;
std::unique_ptr<LRUHandle* []> new_list {
new LRUHandle* [size_t{1} << new_length_bits] {}
};
uint32_t count = 0;
for (uint32_t i = 0; i < old_length; i++) {
LRUHandle* h = list_[i];
while (h != nullptr) {
LRUHandle* next = h->next_hash;
uint32_t hash = h->hash;
LRUHandle** ptr = &new_list[hash >> (32 - new_length_bits)];
h->next_hash = *ptr;
*ptr = h;
h = next;
count++;
}
}
assert(elems_ == count);
list_ = std::move(new_list);
length_bits_ = new_length_bits;
}
LRUCacheShard::LRUCacheShard(size_t capacity, bool strict_capacity_limit,
CacheMetadataChargePolicy metadata_charge_policy,
int max_upper_hash_bits)
: capacity_(0),
strict_capacity_limit_(strict_capacity_limit),
table_(max_upper_hash_bits),
usage_(0),
lru_usage_(0) {
set_metadata_charge_policy(metadata_charge_policy);
// Make empty circular linked list.
lru_.next = &lru_;
lru_.prev = &lru_;
lru_low_pri_ = &lru_;
SetCapacity(capacity);
}
void LRUCacheShard::EraseUnRefEntries() {
autovector<LRUHandle*> last_reference_list;
{
MutexLock l(&mutex_);
while (lru_.next != &lru_) {
LRUHandle* old = lru_.next;
// LRU list contains only elements which can be evicted.
assert(old->InCache() && !old->HasRefs());
LRU_Remove(old);
table_.Remove(old->key(), old->hash);
old->SetInCache(false);
size_t total_charge = old->CalcTotalCharge(metadata_charge_policy_);
assert(usage_ >= total_charge);
usage_ -= total_charge;
last_reference_list.push_back(old);
}
}
// Free the entries here outside of mutex for performance reasons.
for (auto entry : last_reference_list) {
entry->Free();
}
}
void LRUCacheShard::ApplyToSomeEntries(
const std::function<void(const Slice& key, void* value, size_t charge,
DeleterFn deleter)>& callback,
uint32_t average_entries_per_lock, uint32_t* state) {
// The state is essentially going to be the starting hash, which works
// nicely even if we resize between calls because we use upper-most
// hash bits for table indexes.
MutexLock l(&mutex_);
uint32_t length_bits = table_.GetLengthBits();
uint32_t length = uint32_t{1} << length_bits;
assert(average_entries_per_lock > 0);
// Assuming we are called with same average_entries_per_lock repeatedly,
// this simplifies some logic (index_end will not overflow).
assert(average_entries_per_lock < length || *state == 0);
uint32_t index_begin = *state >> (32 - length_bits);
uint32_t index_end = index_begin + average_entries_per_lock;
if (index_end >= length) {
// Going to end
index_end = length;
*state = UINT32_MAX;
} else {
*state = index_end << (32 - length_bits);
}
table_.ApplyToEntriesRange(
[callback](LRUHandle* h) {
callback(h->key(), h->value, h->charge, h->deleter);
},
index_begin, index_end);
}
void LRUCacheShard::LRU_Remove(LRUHandle* e) {
assert(e->next != nullptr);
assert(e->prev != nullptr);
e->next->prev = e->prev;
e->prev->next = e->next;
e->prev = e->next = nullptr;
size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
assert(lru_usage_ >= total_charge);
lru_usage_ -= total_charge;
}
void LRUCacheShard::LRU_Insert(LRUHandle* e) {
assert(e->next == nullptr);
assert(e->prev == nullptr);
size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
// Inset "e" to head of LRU list.
e->next = &lru_;
e->prev = lru_.prev;
e->prev->next = e;
e->next->prev = e;
lru_usage_ += total_charge;
}
void LRUCacheShard::EvictFromLRU(size_t charge,
autovector<LRUHandle*>* deleted) {
while ((usage_ + charge) > capacity_ && lru_.next != &lru_) {
LRUHandle* old = lru_.next;
// LRU list contains only elements which can be evicted.
assert(old->InCache() && !old->HasRefs());
LRU_Remove(old);
table_.Remove(old->key(), old->hash);
old->SetInCache(false);
size_t old_total_charge = old->CalcTotalCharge(metadata_charge_policy_);
assert(usage_ >= old_total_charge);
usage_ -= old_total_charge;
deleted->push_back(old);
}
}
void LRUCacheShard::SetCapacity(size_t capacity) {
autovector<LRUHandle*> last_reference_list;
{
MutexLock l(&mutex_);
capacity_ = capacity;
EvictFromLRU(0, &last_reference_list);
}
// Free the entries here outside of mutex for performance reasons.
for (auto entry : last_reference_list) {
entry->Free();
}
}
void LRUCacheShard::SetStrictCapacityLimit(bool strict_capacity_limit) {
MutexLock l(&mutex_);
strict_capacity_limit_ = strict_capacity_limit;
}
Status LRUCacheShard::InsertItem(LRUHandle* e, Cache::Handle** handle,
bool free_handle_on_fail) {
Status s = Status::OK();
autovector<LRUHandle*> last_reference_list;
size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
{
MutexLock l(&mutex_);
// Free the space following strict LRU policy until enough space
// is freed or the lru list is empty.
EvictFromLRU(total_charge, &last_reference_list);
if ((usage_ + total_charge) > capacity_ &&
(strict_capacity_limit_ || handle == nullptr)) {
e->SetInCache(false);
if (handle == nullptr) {
// Don't insert the entry but still return ok, as if the entry inserted
// into cache and get evicted immediately.
last_reference_list.push_back(e);
} else {
if (free_handle_on_fail) {
delete[] reinterpret_cast<char*>(e);
*handle = nullptr;
}
s = Status::Incomplete("Insert failed due to LRU cache being full.");
}
} else {
// Insert into the cache. Note that the cache might get larger than its
// capacity if not enough space was freed up.
LRUHandle* old = table_.Insert(e);
usage_ += total_charge;
if (old != nullptr) {
s = Status::OkOverwritten();
assert(old->InCache());
old->SetInCache(false);
if (!old->HasRefs()) {
// old is on LRU because it's in cache and its reference count is 0.
LRU_Remove(old);
size_t old_total_charge =
old->CalcTotalCharge(metadata_charge_policy_);
assert(usage_ >= old_total_charge);
usage_ -= old_total_charge;
last_reference_list.push_back(old);
}
}
if (handle == nullptr) {
LRU_Insert(e);
} else {
// If caller already holds a ref, no need to take one here.
if (!e->HasRefs()) {
e->Ref();
}
*handle = reinterpret_cast<Cache::Handle*>(e);
}
}
}
// Free the entries here outside of mutex for performance reasons.
for (auto entry : last_reference_list) {
entry->Free();
}
return s;
}
Cache::Handle* LRUCacheShard::Lookup(const Slice& key, uint32_t hash) {
LRUHandle* e = nullptr;
{
MutexLock l(&mutex_);
e = table_.Lookup(key, hash);
if (e != nullptr) {
assert(e->InCache());
if (!e->HasRefs()) {
// The entry is in LRU since it's in hash and has no external references
LRU_Remove(e);
}
e->Ref();
}
}
return reinterpret_cast<Cache::Handle*>(e);
}
bool LRUCacheShard::Ref(Cache::Handle* h) {
LRUHandle* e = reinterpret_cast<LRUHandle*>(h);
MutexLock l(&mutex_);
// To create another reference - entry must be already externally referenced.
assert(e->HasRefs());
e->Ref();
return true;
}
bool LRUCacheShard::Release(Cache::Handle* handle, bool erase_if_last_ref) {
if (handle == nullptr) {
return false;
}
LRUHandle* e = reinterpret_cast<LRUHandle*>(handle);
bool last_reference = false;
{
MutexLock l(&mutex_);
last_reference = e->Unref();
if (last_reference && e->InCache()) {
// The item is still in cache, and nobody else holds a reference to it.
if (usage_ > capacity_ || erase_if_last_ref) {
// The LRU list must be empty since the cache is full.
assert(lru_.next == &lru_ || erase_if_last_ref);
// Take this opportunity and remove the item.
table_.Remove(e->key(), e->hash);
e->SetInCache(false);
} else {
// Put the item back on the LRU list, and don't free it.
LRU_Insert(e);
last_reference = false;
}
}
// If it was the last reference, then decrement the cache usage.
if (last_reference) {
size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
assert(usage_ >= total_charge);
usage_ -= total_charge;
}
}
// Free the entry here outside of mutex for performance reasons.
if (last_reference) {
e->Free();
}
return last_reference;
}
Status LRUCacheShard::Insert(const Slice& key, uint32_t hash, void* value,
size_t charge, Cache::DeleterFn deleter,
Cache::Handle** handle,
Cache::Priority /*priority*/) {
// Allocate the memory here outside of the mutex.
// If the cache is full, we'll have to release it.
// It shouldn't happen very often though.
LRUHandle* e = reinterpret_cast<LRUHandle*>(
new char[sizeof(LRUHandle) - 1 + key.size()]);
e->value = value;
e->flags = 0;
e->deleter = deleter;
e->charge = charge;
e->key_length = key.size();
e->hash = hash;
e->refs = 0;
e->next = e->prev = nullptr;
e->SetInCache(true);
memcpy(e->key_data, key.data(), key.size());
return InsertItem(e, handle, /* free_handle_on_fail */ true);
}
void LRUCacheShard::Erase(const Slice& key, uint32_t hash) {
LRUHandle* e;
bool last_reference = false;
{
MutexLock l(&mutex_);
e = table_.Remove(key, hash);
if (e != nullptr) {
assert(e->InCache());
e->SetInCache(false);
if (!e->HasRefs()) {
// The entry is in LRU since it's in hash and has no external references
LRU_Remove(e);
size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
assert(usage_ >= total_charge);
usage_ -= total_charge;
last_reference = true;
}
}
}
// Free the entry here outside of mutex for performance reasons.
// last_reference will only be true if e != nullptr.
if (last_reference) {
e->Free();
}
}
size_t LRUCacheShard::GetUsage() const {
MutexLock l(&mutex_);
return usage_;
}
size_t LRUCacheShard::GetPinnedUsage() const {
MutexLock l(&mutex_);
assert(usage_ >= lru_usage_);
return usage_ - lru_usage_;
}
std::string LRUCacheShard::GetPrintableOptions() const { return std::string{}; }
LRUCache::LRUCache(size_t capacity, int num_shard_bits,
bool strict_capacity_limit,
CacheMetadataChargePolicy metadata_charge_policy)
: ShardedCache(capacity, num_shard_bits, strict_capacity_limit) {
num_shards_ = 1 << num_shard_bits;
shards_ = reinterpret_cast<LRUCacheShard*>(
port::cacheline_aligned_alloc(sizeof(LRUCacheShard) * num_shards_));
size_t per_shard = (capacity + (num_shards_ - 1)) / num_shards_;
for (int i = 0; i < num_shards_; i++) {
new (&shards_[i])
LRUCacheShard(per_shard, strict_capacity_limit, metadata_charge_policy,
/* max_upper_hash_bits */ 32 - num_shard_bits);
}
}
LRUCache::~LRUCache() {
if (shards_ != nullptr) {
assert(num_shards_ > 0);
for (int i = 0; i < num_shards_; i++) {
shards_[i].~LRUCacheShard();
}
port::cacheline_aligned_free(shards_);
}
}
CacheShard* LRUCache::GetShard(uint32_t shard) {
return reinterpret_cast<CacheShard*>(&shards_[shard]);
}
const CacheShard* LRUCache::GetShard(uint32_t shard) const {
return reinterpret_cast<CacheShard*>(&shards_[shard]);
}
void* LRUCache::Value(Handle* handle) {
return reinterpret_cast<const LRUHandle*>(handle)->value;
}
size_t LRUCache::GetCharge(Handle* handle) const {
return reinterpret_cast<const LRUHandle*>(handle)->charge;
}
Cache::DeleterFn LRUCache::GetDeleter(Handle* handle) const {
auto h = reinterpret_cast<const LRUHandle*>(handle);
return h->deleter;
}
uint32_t LRUCache::GetHash(Handle* handle) const {
return reinterpret_cast<const LRUHandle*>(handle)->hash;
}
void LRUCache::DisownData() {
// Leak data only if that won't generate an ASAN/valgrind warning.
if (!kMustFreeHeapAllocations) {
shards_ = nullptr;
num_shards_ = 0;
}
}
} // namespace fast_lru_cache
std::shared_ptr<Cache> NewFastLRUCache(
size_t capacity, int num_shard_bits, bool strict_capacity_limit,
CacheMetadataChargePolicy metadata_charge_policy) {
if (num_shard_bits >= 20) {
return nullptr; // The cache cannot be sharded into too many fine pieces.
}
if (num_shard_bits < 0) {
num_shard_bits = GetDefaultCacheShardBits(capacity);
}
return std::make_shared<fast_lru_cache::LRUCache>(
capacity, num_shard_bits, strict_capacity_limit, metadata_charge_policy);
}
} // namespace ROCKSDB_NAMESPACE

299
cache/fast_lru_cache.h vendored
View File

@ -1,299 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#pragma once
#include <memory>
#include <string>
#include "cache/sharded_cache.h"
#include "port/lang.h"
#include "port/malloc.h"
#include "port/port.h"
#include "rocksdb/secondary_cache.h"
#include "util/autovector.h"
namespace ROCKSDB_NAMESPACE {
namespace fast_lru_cache {
// An experimental (under development!) alternative to LRUCache
struct LRUHandle {
void* value;
Cache::DeleterFn deleter;
LRUHandle* next_hash;
LRUHandle* next;
LRUHandle* prev;
size_t charge; // TODO(opt): Only allow uint32_t?
size_t key_length;
// The hash of key(). Used for fast sharding and comparisons.
uint32_t hash;
// The number of external refs to this entry. The cache itself is not counted.
uint32_t refs;
enum Flags : uint8_t {
// Whether this entry is referenced by the hash table.
IN_CACHE = (1 << 0),
};
uint8_t flags;
// Beginning of the key (MUST BE THE LAST FIELD IN THIS STRUCT!)
char key_data[1];
Slice key() const { return Slice(key_data, key_length); }
// Increase the reference count by 1.
void Ref() { refs++; }
// Just reduce the reference count by 1. Return true if it was last reference.
bool Unref() {
assert(refs > 0);
refs--;
return refs == 0;
}
// Return true if there are external refs, false otherwise.
bool HasRefs() const { return refs > 0; }
bool InCache() const { return flags & IN_CACHE; }
void SetInCache(bool in_cache) {
if (in_cache) {
flags |= IN_CACHE;
} else {
flags &= ~IN_CACHE;
}
}
void Free() {
assert(refs == 0);
if (deleter) {
(*deleter)(key(), value);
}
delete[] reinterpret_cast<char*>(this);
}
// Calculate the memory usage by metadata.
inline size_t CalcTotalCharge(
CacheMetadataChargePolicy metadata_charge_policy) {
size_t meta_charge = 0;
if (metadata_charge_policy == kFullChargeCacheMetadata) {
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
meta_charge += malloc_usable_size(static_cast<void*>(this));
#else
// This is the size that is used when a new handle is created.
meta_charge += sizeof(LRUHandle) - 1 + key_length;
#endif
}
return charge + meta_charge;
}
};
// We provide our own simple hash table since it removes a whole bunch
// of porting hacks and is also faster than some of the built-in hash
// table implementations in some of the compiler/runtime combinations
// we have tested. E.g., readrandom speeds up by ~5% over the g++
// 4.4.3's builtin hashtable.
class LRUHandleTable {
public:
// If the table uses more hash bits than `max_upper_hash_bits`,
// it will eat into the bits used for sharding, which are constant
// for a given LRUHandleTable.
explicit LRUHandleTable(int max_upper_hash_bits);
~LRUHandleTable();
LRUHandle* Lookup(const Slice& key, uint32_t hash);
LRUHandle* Insert(LRUHandle* h);
LRUHandle* Remove(const Slice& key, uint32_t hash);
template <typename T>
void ApplyToEntriesRange(T func, uint32_t index_begin, uint32_t index_end) {
for (uint32_t i = index_begin; i < index_end; i++) {
LRUHandle* h = list_[i];
while (h != nullptr) {
auto n = h->next_hash;
assert(h->InCache());
func(h);
h = n;
}
}
}
int GetLengthBits() const { return length_bits_; }
private:
// Return a pointer to slot that points to a cache entry that
// matches key/hash. If there is no such cache entry, return a
// pointer to the trailing slot in the corresponding linked list.
LRUHandle** FindPointer(const Slice& key, uint32_t hash);
void Resize();
// Number of hash bits (upper because lower bits used for sharding)
// used for table index. Length == 1 << length_bits_
int length_bits_;
// The table consists of an array of buckets where each bucket is
// a linked list of cache entries that hash into the bucket.
std::unique_ptr<LRUHandle*[]> list_;
// Number of elements currently in the table.
uint32_t elems_;
// Set from max_upper_hash_bits (see constructor).
const int max_length_bits_;
};
// A single shard of sharded cache.
class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard {
public:
LRUCacheShard(size_t capacity, bool strict_capacity_limit,
CacheMetadataChargePolicy metadata_charge_policy,
int max_upper_hash_bits);
~LRUCacheShard() override = default;
// Separate from constructor so caller can easily make an array of LRUCache
// if current usage is more than new capacity, the function will attempt to
// free the needed space.
void SetCapacity(size_t capacity) override;
// Set the flag to reject insertion if cache if full.
void SetStrictCapacityLimit(bool strict_capacity_limit) override;
// Like Cache methods, but with an extra "hash" parameter.
Status Insert(const Slice& key, uint32_t hash, void* value, size_t charge,
Cache::DeleterFn deleter, Cache::Handle** handle,
Cache::Priority priority) override;
Status Insert(const Slice& key, uint32_t hash, void* value,
const Cache::CacheItemHelper* helper, size_t charge,
Cache::Handle** handle, Cache::Priority priority) override {
return Insert(key, hash, value, charge, helper->del_cb, handle, priority);
}
Cache::Handle* Lookup(const Slice& key, uint32_t hash,
const Cache::CacheItemHelper* /*helper*/,
const Cache::CreateCallback& /*create_cb*/,
Cache::Priority /*priority*/, bool /*wait*/,
Statistics* /*stats*/) override {
return Lookup(key, hash);
}
Cache::Handle* Lookup(const Slice& key, uint32_t hash) override;
bool Release(Cache::Handle* handle, bool /*useful*/,
bool erase_if_last_ref) override {
return Release(handle, erase_if_last_ref);
}
bool IsReady(Cache::Handle* /*handle*/) override { return true; }
void Wait(Cache::Handle* /*handle*/) override {}
bool Ref(Cache::Handle* handle) override;
bool Release(Cache::Handle* handle, bool erase_if_last_ref = false) override;
void Erase(const Slice& key, uint32_t hash) override;
size_t GetUsage() const override;
size_t GetPinnedUsage() const override;
void ApplyToSomeEntries(
const std::function<void(const Slice& key, void* value, size_t charge,
DeleterFn deleter)>& callback,
uint32_t average_entries_per_lock, uint32_t* state) override;
void EraseUnRefEntries() override;
std::string GetPrintableOptions() const override;
private:
friend class LRUCache;
// Insert an item into the hash table and, if handle is null, insert into
// the LRU list. Older items are evicted as necessary. If the cache is full
// and free_handle_on_fail is true, the item is deleted and handle is set to
// nullptr.
Status InsertItem(LRUHandle* item, Cache::Handle** handle,
bool free_handle_on_fail);
void LRU_Remove(LRUHandle* e);
void LRU_Insert(LRUHandle* e);
// Free some space following strict LRU policy until enough space
// to hold (usage_ + charge) is freed or the lru list is empty
// This function is not thread safe - it needs to be executed while
// holding the mutex_.
void EvictFromLRU(size_t charge, autovector<LRUHandle*>* deleted);
// Initialized before use.
size_t capacity_;
// Whether to reject insertion if cache reaches its full capacity.
bool strict_capacity_limit_;
// Dummy head of LRU list.
// lru.prev is newest entry, lru.next is oldest entry.
// LRU contains items which can be evicted, ie reference only by cache
LRUHandle lru_;
// Pointer to head of low-pri pool in LRU list.
LRUHandle* lru_low_pri_;
// ------------^^^^^^^^^^^^^-----------
// Not frequently modified data members
// ------------------------------------
//
// We separate data members that are updated frequently from the ones that
// are not frequently updated so that they don't share the same cache line
// which will lead into false cache sharing
//
// ------------------------------------
// Frequently modified data members
// ------------vvvvvvvvvvvvv-----------
LRUHandleTable table_;
// Memory size for entries residing in the cache.
size_t usage_;
// Memory size for entries residing only in the LRU list.
size_t lru_usage_;
// mutex_ protects the following state.
// We don't count mutex_ as the cache's internal state so semantically we
// don't mind mutex_ invoking the non-const actions.
mutable port::Mutex mutex_;
};
class LRUCache
#ifdef NDEBUG
final
#endif
: public ShardedCache {
public:
LRUCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit,
CacheMetadataChargePolicy metadata_charge_policy =
kDontChargeCacheMetadata);
~LRUCache() override;
const char* Name() const override { return "LRUCache"; }
CacheShard* GetShard(uint32_t shard) override;
const CacheShard* GetShard(uint32_t shard) const override;
void* Value(Handle* handle) override;
size_t GetCharge(Handle* handle) const override;
uint32_t GetHash(Handle* handle) const override;
DeleterFn GetDeleter(Handle* handle) const override;
void DisownData() override;
private:
LRUCacheShard* shards_ = nullptr;
int num_shards_ = 0;
};
} // namespace fast_lru_cache
std::shared_ptr<Cache> NewFastLRUCache(
size_t capacity, int num_shard_bits = -1,
bool strict_capacity_limit = false,
CacheMetadataChargePolicy metadata_charge_policy =
kDefaultCacheMetadataChargePolicy);
} // namespace ROCKSDB_NAMESPACE

555
cache/lru_cache.cc vendored
View File

@ -9,32 +9,26 @@
#include "cache/lru_cache.h"
#include <cassert>
#include <cstdint>
#include <cstdio>
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include "monitoring/perf_context_imp.h"
#include "monitoring/statistics.h"
#include "port/lang.h"
#include "util/mutexlock.h"
namespace ROCKSDB_NAMESPACE {
namespace lru_cache {
LRUHandleTable::LRUHandleTable(int max_upper_hash_bits)
: length_bits_(/* historical starting size*/ 4),
list_(new LRUHandle* [size_t{1} << length_bits_] {}),
elems_(0),
max_length_bits_(max_upper_hash_bits) {}
LRUHandleTable::LRUHandleTable() : list_(nullptr), length_(0), elems_(0) {
Resize();
}
LRUHandleTable::~LRUHandleTable() {
ApplyToEntriesRange(
[](LRUHandle* h) {
if (!h->HasRefs()) {
h->Free();
}
},
0, uint32_t{1} << length_bits_);
ApplyToAllCacheEntries([](LRUHandle* h) {
if (!h->HasRefs()) {
h->Free();
}
});
delete[] list_;
}
LRUHandle* LRUHandleTable::Lookup(const Slice& key, uint32_t hash) {
@ -48,7 +42,7 @@ LRUHandle* LRUHandleTable::Insert(LRUHandle* h) {
*ptr = h;
if (old == nullptr) {
++elems_;
if ((elems_ >> length_bits_) > 0) { // elems_ >= length
if (elems_ > length_) {
// Since each cache entry is fairly large, we aim for a small
// average linked list length (<= 1).
Resize();
@ -68,7 +62,7 @@ LRUHandle* LRUHandleTable::Remove(const Slice& key, uint32_t hash) {
}
LRUHandle** LRUHandleTable::FindPointer(const Slice& key, uint32_t hash) {
LRUHandle** ptr = &list_[hash >> (32 - length_bits_)];
LRUHandle** ptr = &list_[hash & (length_ - 1)];
while (*ptr != nullptr && ((*ptr)->hash != hash || key != (*ptr)->key())) {
ptr = &(*ptr)->next_hash;
}
@ -76,28 +70,19 @@ LRUHandle** LRUHandleTable::FindPointer(const Slice& key, uint32_t hash) {
}
void LRUHandleTable::Resize() {
if (length_bits_ >= max_length_bits_) {
// Due to reaching limit of hash information, if we made the table bigger,
// we would allocate more addresses but only the same number would be used.
return;
uint32_t new_length = 16;
while (new_length < elems_ * 1.5) {
new_length *= 2;
}
if (length_bits_ >= 31) {
// Avoid undefined behavior shifting uint32_t by 32.
return;
}
uint32_t old_length = uint32_t{1} << length_bits_;
int new_length_bits = length_bits_ + 1;
std::unique_ptr<LRUHandle* []> new_list {
new LRUHandle* [size_t{1} << new_length_bits] {}
};
LRUHandle** new_list = new LRUHandle*[new_length];
memset(new_list, 0, sizeof(new_list[0]) * new_length);
uint32_t count = 0;
for (uint32_t i = 0; i < old_length; i++) {
for (uint32_t i = 0; i < length_; i++) {
LRUHandle* h = list_[i];
while (h != nullptr) {
LRUHandle* next = h->next_hash;
uint32_t hash = h->hash;
LRUHandle** ptr = &new_list[hash >> (32 - new_length_bits)];
LRUHandle** ptr = &new_list[hash & (new_length - 1)];
h->next_hash = *ptr;
*ptr = h;
h = next;
@ -105,27 +90,25 @@ void LRUHandleTable::Resize() {
}
}
assert(elems_ == count);
list_ = std::move(new_list);
length_bits_ = new_length_bits;
delete[] list_;
list_ = new_list;
length_ = new_length;
}
LRUCacheShard::LRUCacheShard(
size_t capacity, bool strict_capacity_limit, double high_pri_pool_ratio,
bool use_adaptive_mutex, CacheMetadataChargePolicy metadata_charge_policy,
int max_upper_hash_bits,
const std::shared_ptr<SecondaryCache>& secondary_cache)
LRUCacheShard::LRUCacheShard(size_t capacity, bool strict_capacity_limit,
double high_pri_pool_ratio,
bool use_adaptive_mutex,
CacheMetadataChargePolicy metadata_charge_policy)
: capacity_(0),
high_pri_pool_usage_(0),
strict_capacity_limit_(strict_capacity_limit),
high_pri_pool_ratio_(high_pri_pool_ratio),
high_pri_pool_capacity_(0),
table_(max_upper_hash_bits),
usage_(0),
lru_usage_(0),
mutex_(use_adaptive_mutex),
secondary_cache_(secondary_cache) {
mutex_(use_adaptive_mutex) {
set_metadata_charge_policy(metadata_charge_policy);
// Make empty circular linked list.
// Make empty circular linked list
lru_.next = &lru_;
lru_.prev = &lru_;
lru_low_pri_ = &lru_;
@ -138,7 +121,7 @@ void LRUCacheShard::EraseUnRefEntries() {
MutexLock l(&mutex_);
while (lru_.next != &lru_) {
LRUHandle* old = lru_.next;
// LRU list contains only elements which can be evicted.
// LRU list contains only elements which can be evicted
assert(old->InCache() && !old->HasRefs());
LRU_Remove(old);
table_.Remove(old->key(), old->hash);
@ -155,40 +138,19 @@ void LRUCacheShard::EraseUnRefEntries() {
}
}
void LRUCacheShard::ApplyToSomeEntries(
const std::function<void(const Slice& key, void* value, size_t charge,
DeleterFn deleter)>& callback,
uint32_t average_entries_per_lock, uint32_t* state) {
// The state is essentially going to be the starting hash, which works
// nicely even if we resize between calls because we use upper-most
// hash bits for table indexes.
MutexLock l(&mutex_);
uint32_t length_bits = table_.GetLengthBits();
uint32_t length = uint32_t{1} << length_bits;
void LRUCacheShard::ApplyToAllCacheEntries(void (*callback)(void*, size_t),
bool thread_safe) {
const auto applyCallback = [&]() {
table_.ApplyToAllCacheEntries(
[callback](LRUHandle* h) { callback(h->value, h->charge); });
};
assert(average_entries_per_lock > 0);
// Assuming we are called with same average_entries_per_lock repeatedly,
// this simplifies some logic (index_end will not overflow).
assert(average_entries_per_lock < length || *state == 0);
uint32_t index_begin = *state >> (32 - length_bits);
uint32_t index_end = index_begin + average_entries_per_lock;
if (index_end >= length) {
// Going to end
index_end = length;
*state = UINT32_MAX;
if (thread_safe) {
MutexLock l(&mutex_);
applyCallback();
} else {
*state = index_end << (32 - length_bits);
applyCallback();
}
table_.ApplyToEntriesRange(
[callback](LRUHandle* h) {
DeleterFn deleter = h->IsSecondaryCacheCompatible()
? h->info_.helper->del_cb
: h->info_.deleter;
callback(h->key(), h->value, h->charge, deleter);
},
index_begin, index_end);
}
void LRUCacheShard::TEST_GetLRUList(LRUHandle** lru, LRUHandle** lru_low_pri) {
@ -274,7 +236,7 @@ void LRUCacheShard::EvictFromLRU(size_t charge,
autovector<LRUHandle*>* deleted) {
while ((usage_ + charge) > capacity_ && lru_.next != &lru_) {
LRUHandle* old = lru_.next;
// LRU list contains only elements which can be evicted.
// LRU list contains only elements which can be evicted
assert(old->InCache() && !old->HasRefs());
LRU_Remove(old);
table_.Remove(old->key(), old->hash);
@ -295,14 +257,8 @@ void LRUCacheShard::SetCapacity(size_t capacity) {
EvictFromLRU(0, &last_reference_list);
}
// Try to insert the evicted entries into tiered cache.
// Free the entries outside of mutex for performance reasons.
// Free the entries outside of mutex for performance reasons
for (auto entry : last_reference_list) {
if (secondary_cache_ && entry->IsSecondaryCacheCompatible() &&
!entry->IsInSecondaryCache()) {
secondary_cache_->Insert(entry->key(), entry->value, entry->info_.helper)
.PermitUncheckedError();
}
entry->Free();
}
}
@ -312,183 +268,17 @@ void LRUCacheShard::SetStrictCapacityLimit(bool strict_capacity_limit) {
strict_capacity_limit_ = strict_capacity_limit;
}
Status LRUCacheShard::InsertItem(LRUHandle* e, Cache::Handle** handle,
bool free_handle_on_fail) {
Status s = Status::OK();
autovector<LRUHandle*> last_reference_list;
size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
{
MutexLock l(&mutex_);
// Free the space following strict LRU policy until enough space
// is freed or the lru list is empty.
EvictFromLRU(total_charge, &last_reference_list);
if ((usage_ + total_charge) > capacity_ &&
(strict_capacity_limit_ || handle == nullptr)) {
e->SetInCache(false);
if (handle == nullptr) {
// Don't insert the entry but still return ok, as if the entry inserted
// into cache and get evicted immediately.
last_reference_list.push_back(e);
} else {
if (free_handle_on_fail) {
delete[] reinterpret_cast<char*>(e);
*handle = nullptr;
}
s = Status::Incomplete("Insert failed due to LRU cache being full.");
}
} else {
// Insert into the cache. Note that the cache might get larger than its
// capacity if not enough space was freed up.
LRUHandle* old = table_.Insert(e);
usage_ += total_charge;
if (old != nullptr) {
s = Status::OkOverwritten();
assert(old->InCache());
old->SetInCache(false);
if (!old->HasRefs()) {
// old is on LRU because it's in cache and its reference count is 0.
LRU_Remove(old);
size_t old_total_charge =
old->CalcTotalCharge(metadata_charge_policy_);
assert(usage_ >= old_total_charge);
usage_ -= old_total_charge;
last_reference_list.push_back(old);
}
}
if (handle == nullptr) {
LRU_Insert(e);
} else {
// If caller already holds a ref, no need to take one here.
if (!e->HasRefs()) {
e->Ref();
}
*handle = reinterpret_cast<Cache::Handle*>(e);
}
}
}
// Try to insert the evicted entries into the secondary cache.
// Free the entries here outside of mutex for performance reasons.
for (auto entry : last_reference_list) {
if (secondary_cache_ && entry->IsSecondaryCacheCompatible() &&
!entry->IsInSecondaryCache()) {
secondary_cache_->Insert(entry->key(), entry->value, entry->info_.helper)
.PermitUncheckedError();
}
entry->Free();
}
return s;
}
void LRUCacheShard::Promote(LRUHandle* e) {
SecondaryCacheResultHandle* secondary_handle = e->sec_handle;
assert(secondary_handle->IsReady());
e->SetIncomplete(false);
e->SetInCache(true);
e->value = secondary_handle->Value();
e->charge = secondary_handle->Size();
delete secondary_handle;
// This call could fail if the cache is over capacity and
// strict_capacity_limit_ is true. In such a case, we don't want
// InsertItem() to free the handle, since the item is already in memory
// and the caller will most likely just read from disk if we erase it here.
if (e->value) {
Cache::Handle* handle = reinterpret_cast<Cache::Handle*>(e);
Status s = InsertItem(e, &handle, /*free_handle_on_fail=*/false);
if (!s.ok()) {
// Item is in memory, but not accounted against the cache capacity.
// When the handle is released, the item should get deleted.
assert(!e->InCache());
}
} else {
// Since the secondary cache lookup failed, mark the item as not in cache
// Don't charge the cache as its only metadata that'll shortly be released
MutexLock l(&mutex_);
e->charge = 0;
e->SetInCache(false);
}
}
Cache::Handle* LRUCacheShard::Lookup(
const Slice& key, uint32_t hash,
const ShardedCache::CacheItemHelper* helper,
const ShardedCache::CreateCallback& create_cb, Cache::Priority priority,
bool wait, Statistics* stats) {
LRUHandle* e = nullptr;
{
MutexLock l(&mutex_);
e = table_.Lookup(key, hash);
if (e != nullptr) {
assert(e->InCache());
if (!e->HasRefs()) {
// The entry is in LRU since it's in hash and has no external references
LRU_Remove(e);
}
e->Ref();
e->SetHit();
}
}
// If handle table lookup failed, then allocate a handle outside the
// mutex if we're going to lookup in the secondary cache.
// Only support synchronous for now.
// TODO: Support asynchronous lookup in secondary cache
if (!e && secondary_cache_ && helper && helper->saveto_cb) {
// For objects from the secondary cache, we expect the caller to provide
// a way to create/delete the primary cache object. The only case where
// a deleter would not be required is for dummy entries inserted for
// accounting purposes, which we won't demote to the secondary cache
// anyway.
assert(create_cb && helper->del_cb);
bool is_in_sec_cache{false};
std::unique_ptr<SecondaryCacheResultHandle> secondary_handle =
secondary_cache_->Lookup(key, create_cb, wait, is_in_sec_cache);
if (secondary_handle != nullptr) {
e = reinterpret_cast<LRUHandle*>(
new char[sizeof(LRUHandle) - 1 + key.size()]);
e->flags = 0;
e->SetSecondaryCacheCompatible(true);
e->info_.helper = helper;
e->key_length = key.size();
e->hash = hash;
e->refs = 0;
e->next = e->prev = nullptr;
e->SetPriority(priority);
memcpy(e->key_data, key.data(), key.size());
e->value = nullptr;
e->sec_handle = secondary_handle.release();
e->Ref();
if (wait) {
Promote(e);
e->SetIsInSecondaryCache(is_in_sec_cache);
if (!e->value) {
// The secondary cache returned a handle, but the lookup failed.
e->Unref();
e->Free();
e = nullptr;
} else {
PERF_COUNTER_ADD(secondary_cache_hit_count, 1);
RecordTick(stats, SECONDARY_CACHE_HITS);
}
} else {
// If wait is false, we always return a handle and let the caller
// release the handle after checking for success or failure.
e->SetIncomplete(true);
e->SetIsInSecondaryCache(is_in_sec_cache);
// This may be slightly inaccurate, if the lookup eventually fails.
// But the probability is very low.
PERF_COUNTER_ADD(secondary_cache_hit_count, 1);
RecordTick(stats, SECONDARY_CACHE_HITS);
}
Cache::Handle* LRUCacheShard::Lookup(const Slice& key, uint32_t hash) {
MutexLock l(&mutex_);
LRUHandle* e = table_.Lookup(key, hash);
if (e != nullptr) {
assert(e->InCache());
if (!e->HasRefs()) {
// The entry is in LRU since it's in hash and has no external references
LRU_Remove(e);
}
e->Ref();
e->SetHit();
}
return reinterpret_cast<Cache::Handle*>(e);
}
@ -496,7 +286,7 @@ Cache::Handle* LRUCacheShard::Lookup(
bool LRUCacheShard::Ref(Cache::Handle* h) {
LRUHandle* e = reinterpret_cast<LRUHandle*>(h);
MutexLock l(&mutex_);
// To create another reference - entry must be already externally referenced.
// To create another reference - entry must be already externally referenced
assert(e->HasRefs());
e->Ref();
return true;
@ -509,7 +299,7 @@ void LRUCacheShard::SetHighPriorityPoolRatio(double high_pri_pool_ratio) {
MaintainPoolSize();
}
bool LRUCacheShard::Release(Cache::Handle* handle, bool erase_if_last_ref) {
bool LRUCacheShard::Release(Cache::Handle* handle, bool force_erase) {
if (handle == nullptr) {
return false;
}
@ -519,32 +309,27 @@ bool LRUCacheShard::Release(Cache::Handle* handle, bool erase_if_last_ref) {
MutexLock l(&mutex_);
last_reference = e->Unref();
if (last_reference && e->InCache()) {
// The item is still in cache, and nobody else holds a reference to it.
if (usage_ > capacity_ || erase_if_last_ref) {
// The LRU list must be empty since the cache is full.
assert(lru_.next == &lru_ || erase_if_last_ref);
// Take this opportunity and remove the item.
// The item is still in cache, and nobody else holds a reference to it
if (usage_ > capacity_ || force_erase) {
// The LRU list must be empty since the cache is full
assert(lru_.next == &lru_ || force_erase);
// Take this opportunity and remove the item
table_.Remove(e->key(), e->hash);
e->SetInCache(false);
} else {
// Put the item back on the LRU list, and don't free it.
// Put the item back on the LRU list, and don't free it
LRU_Insert(e);
last_reference = false;
}
}
// If it was the last reference, and the entry is either not secondary
// cache compatible (i.e a dummy entry for accounting), or is secondary
// cache compatible and has a non-null value, then decrement the cache
// usage. If value is null in the latter case, taht means the lookup
// failed and we didn't charge the cache.
if (last_reference && (!e->IsSecondaryCacheCompatible() || e->value)) {
if (last_reference) {
size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
assert(usage_ >= total_charge);
usage_ -= total_charge;
}
}
// Free the entry here outside of mutex for performance reasons.
// Free the entry here outside of mutex for performance reasons
if (last_reference) {
e->Free();
}
@ -554,35 +339,81 @@ bool LRUCacheShard::Release(Cache::Handle* handle, bool erase_if_last_ref) {
Status LRUCacheShard::Insert(const Slice& key, uint32_t hash, void* value,
size_t charge,
void (*deleter)(const Slice& key, void* value),
const Cache::CacheItemHelper* helper,
Cache::Handle** handle, Cache::Priority priority) {
// Allocate the memory here outside of the mutex.
// If the cache is full, we'll have to release it.
// Allocate the memory here outside of the mutex
// If the cache is full, we'll have to release it
// It shouldn't happen very often though.
LRUHandle* e = reinterpret_cast<LRUHandle*>(
new char[sizeof(LRUHandle) - 1 + key.size()]);
Status s = Status::OK();
autovector<LRUHandle*> last_reference_list;
e->value = value;
e->flags = 0;
if (helper) {
e->SetSecondaryCacheCompatible(true);
e->info_.helper = helper;
} else {
#ifdef __SANITIZE_THREAD__
e->is_secondary_cache_compatible_for_tsan = false;
#endif // __SANITIZE_THREAD__
e->info_.deleter = deleter;
}
e->deleter = deleter;
e->charge = charge;
e->key_length = key.size();
e->flags = 0;
e->hash = hash;
e->refs = 0;
e->next = e->prev = nullptr;
e->SetInCache(true);
e->SetPriority(priority);
memcpy(e->key_data, key.data(), key.size());
size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
return InsertItem(e, handle, /* free_handle_on_fail */ true);
{
MutexLock l(&mutex_);
// Free the space following strict LRU policy until enough space
// is freed or the lru list is empty
EvictFromLRU(total_charge, &last_reference_list);
if ((usage_ + total_charge) > capacity_ &&
(strict_capacity_limit_ || handle == nullptr)) {
if (handle == nullptr) {
// Don't insert the entry but still return ok, as if the entry inserted
// into cache and get evicted immediately.
e->SetInCache(false);
last_reference_list.push_back(e);
} else {
delete[] reinterpret_cast<char*>(e);
*handle = nullptr;
s = Status::Incomplete("Insert failed due to LRU cache being full.");
}
} else {
// Insert into the cache. Note that the cache might get larger than its
// capacity if not enough space was freed up.
LRUHandle* old = table_.Insert(e);
usage_ += total_charge;
if (old != nullptr) {
s = Status::OkOverwritten();
assert(old->InCache());
old->SetInCache(false);
if (!old->HasRefs()) {
// old is on LRU because it's in cache and its reference count is 0
LRU_Remove(old);
size_t old_total_charge =
old->CalcTotalCharge(metadata_charge_policy_);
assert(usage_ >= old_total_charge);
usage_ -= old_total_charge;
last_reference_list.push_back(old);
}
}
if (handle == nullptr) {
LRU_Insert(e);
} else {
e->Ref();
*handle = reinterpret_cast<Cache::Handle*>(e);
}
}
}
// Free the entries here outside of mutex for performance reasons
for (auto entry : last_reference_list) {
entry->Free();
}
return s;
}
void LRUCacheShard::Erase(const Slice& key, uint32_t hash) {
@ -605,25 +436,13 @@ void LRUCacheShard::Erase(const Slice& key, uint32_t hash) {
}
}
// Free the entry here outside of mutex for performance reasons.
// last_reference will only be true if e != nullptr.
// Free the entry here outside of mutex for performance reasons
// last_reference will only be true if e != nullptr
if (last_reference) {
e->Free();
}
}
bool LRUCacheShard::IsReady(Cache::Handle* handle) {
LRUHandle* e = reinterpret_cast<LRUHandle*>(handle);
MutexLock l(&mutex_);
bool ready = true;
if (e->IsPending()) {
assert(secondary_cache_);
assert(e->sec_handle);
ready = e->sec_handle->IsReady();
}
return ready;
}
size_t LRUCacheShard::GetUsage() const {
MutexLock l(&mutex_);
return usage_;
@ -650,8 +469,7 @@ LRUCache::LRUCache(size_t capacity, int num_shard_bits,
bool strict_capacity_limit, double high_pri_pool_ratio,
std::shared_ptr<MemoryAllocator> allocator,
bool use_adaptive_mutex,
CacheMetadataChargePolicy metadata_charge_policy,
const std::shared_ptr<SecondaryCache>& secondary_cache)
CacheMetadataChargePolicy metadata_charge_policy)
: ShardedCache(capacity, num_shard_bits, strict_capacity_limit,
std::move(allocator)) {
num_shards_ = 1 << num_shard_bits;
@ -659,12 +477,10 @@ LRUCache::LRUCache(size_t capacity, int num_shard_bits,
port::cacheline_aligned_alloc(sizeof(LRUCacheShard) * num_shards_));
size_t per_shard = (capacity + (num_shards_ - 1)) / num_shards_;
for (int i = 0; i < num_shards_; i++) {
new (&shards_[i]) LRUCacheShard(
per_shard, strict_capacity_limit, high_pri_pool_ratio,
use_adaptive_mutex, metadata_charge_policy,
/* max_upper_hash_bits */ 32 - num_shard_bits, secondary_cache);
new (&shards_[i])
LRUCacheShard(per_shard, strict_capacity_limit, high_pri_pool_ratio,
use_adaptive_mutex, metadata_charge_policy);
}
secondary_cache_ = secondary_cache;
}
LRUCache::~LRUCache() {
@ -677,11 +493,11 @@ LRUCache::~LRUCache() {
}
}
CacheShard* LRUCache::GetShard(uint32_t shard) {
CacheShard* LRUCache::GetShard(int shard) {
return reinterpret_cast<CacheShard*>(&shards_[shard]);
}
const CacheShard* LRUCache::GetShard(uint32_t shard) const {
const CacheShard* LRUCache::GetShard(int shard) const {
return reinterpret_cast<CacheShard*>(&shards_[shard]);
}
@ -693,25 +509,23 @@ size_t LRUCache::GetCharge(Handle* handle) const {
return reinterpret_cast<const LRUHandle*>(handle)->charge;
}
Cache::DeleterFn LRUCache::GetDeleter(Handle* handle) const {
auto h = reinterpret_cast<const LRUHandle*>(handle);
if (h->IsSecondaryCacheCompatible()) {
return h->info_.helper->del_cb;
} else {
return h->info_.deleter;
}
}
uint32_t LRUCache::GetHash(Handle* handle) const {
return reinterpret_cast<const LRUHandle*>(handle)->hash;
}
void LRUCache::DisownData() {
// Leak data only if that won't generate an ASAN/valgrind warning.
if (!kMustFreeHeapAllocations) {
shards_ = nullptr;
num_shards_ = 0;
}
// Do not drop data if compile with ASAN to suppress leak warning.
#if defined(__clang__)
#if !defined(__has_feature) || !__has_feature(address_sanitizer)
shards_ = nullptr;
num_shards_ = 0;
#endif
#else // __clang__
#ifndef __SANITIZE_ADDRESS__
shards_ = nullptr;
num_shards_ = 0;
#endif // !__SANITIZE_ADDRESS__
#endif // __clang__
}
size_t LRUCache::TEST_GetLRUSize() {
@ -730,66 +544,12 @@ double LRUCache::GetHighPriPoolRatio() {
return result;
}
void LRUCache::WaitAll(std::vector<Handle*>& handles) {
if (secondary_cache_) {
std::vector<SecondaryCacheResultHandle*> sec_handles;
sec_handles.reserve(handles.size());
for (Handle* handle : handles) {
if (!handle) {
continue;
}
LRUHandle* lru_handle = reinterpret_cast<LRUHandle*>(handle);
if (!lru_handle->IsPending()) {
continue;
}
sec_handles.emplace_back(lru_handle->sec_handle);
}
secondary_cache_->WaitAll(sec_handles);
for (Handle* handle : handles) {
if (!handle) {
continue;
}
LRUHandle* lru_handle = reinterpret_cast<LRUHandle*>(handle);
if (!lru_handle->IsPending()) {
continue;
}
uint32_t hash = GetHash(handle);
LRUCacheShard* shard = static_cast<LRUCacheShard*>(GetShard(Shard(hash)));
shard->Promote(lru_handle);
}
}
}
} // namespace lru_cache
std::shared_ptr<Cache> NewLRUCache(
size_t capacity, int num_shard_bits, bool strict_capacity_limit,
double high_pri_pool_ratio,
std::shared_ptr<MemoryAllocator> memory_allocator, bool use_adaptive_mutex,
CacheMetadataChargePolicy metadata_charge_policy,
const std::shared_ptr<SecondaryCache>& secondary_cache) {
if (num_shard_bits >= 20) {
return nullptr; // The cache cannot be sharded into too many fine pieces.
}
if (high_pri_pool_ratio < 0.0 || high_pri_pool_ratio > 1.0) {
// Invalid high_pri_pool_ratio
return nullptr;
}
if (num_shard_bits < 0) {
num_shard_bits = GetDefaultCacheShardBits(capacity);
}
return std::make_shared<LRUCache>(
capacity, num_shard_bits, strict_capacity_limit, high_pri_pool_ratio,
std::move(memory_allocator), use_adaptive_mutex, metadata_charge_policy,
secondary_cache);
}
std::shared_ptr<Cache> NewLRUCache(const LRUCacheOptions& cache_opts) {
return NewLRUCache(
cache_opts.capacity, cache_opts.num_shard_bits,
cache_opts.strict_capacity_limit, cache_opts.high_pri_pool_ratio,
cache_opts.memory_allocator, cache_opts.use_adaptive_mutex,
cache_opts.metadata_charge_policy, cache_opts.secondary_cache);
return NewLRUCache(cache_opts.capacity, cache_opts.num_shard_bits,
cache_opts.strict_capacity_limit,
cache_opts.high_pri_pool_ratio,
cache_opts.memory_allocator, cache_opts.use_adaptive_mutex,
cache_opts.metadata_charge_policy);
}
std::shared_ptr<Cache> NewLRUCache(
@ -797,8 +557,19 @@ std::shared_ptr<Cache> NewLRUCache(
double high_pri_pool_ratio,
std::shared_ptr<MemoryAllocator> memory_allocator, bool use_adaptive_mutex,
CacheMetadataChargePolicy metadata_charge_policy) {
return NewLRUCache(capacity, num_shard_bits, strict_capacity_limit,
high_pri_pool_ratio, memory_allocator, use_adaptive_mutex,
metadata_charge_policy, nullptr);
if (num_shard_bits >= 20) {
return nullptr; // the cache cannot be sharded into too many fine pieces
}
if (high_pri_pool_ratio < 0.0 || high_pri_pool_ratio > 1.0) {
// invalid high_pri_pool_ratio
return nullptr;
}
if (num_shard_bits < 0) {
num_shard_bits = GetDefaultCacheShardBits(capacity);
}
return std::make_shared<LRUCache>(
capacity, num_shard_bits, strict_capacity_limit, high_pri_pool_ratio,
std::move(memory_allocator), use_adaptive_mutex, metadata_charge_policy);
}
} // namespace ROCKSDB_NAMESPACE

218
cache/lru_cache.h vendored
View File

@ -1,4 +1,4 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
@ -8,18 +8,15 @@
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#pragma once
#include <memory>
#include <string>
#include "cache/sharded_cache.h"
#include "port/lang.h"
#include "port/malloc.h"
#include "port/port.h"
#include "rocksdb/secondary_cache.h"
#include "util/autovector.h"
namespace ROCKSDB_NAMESPACE {
namespace lru_cache {
// LRU cache implementation. This class is not thread-safe.
@ -52,18 +49,8 @@ namespace lru_cache {
struct LRUHandle {
void* value;
union Info {
Info() {}
~Info() {}
Cache::DeleterFn deleter;
const ShardedCache::CacheItemHelper* helper;
} info_;
// An entry is not added to the LRUHandleTable until the secondary cache
// lookup is complete, so its safe to have this union.
union {
LRUHandle* next_hash;
SecondaryCacheResultHandle* sec_handle;
};
void (*deleter)(const Slice&, void* value);
LRUHandle* next_hash;
LRUHandle* next;
LRUHandle* prev;
size_t charge; // TODO(opt): Only allow uint32_t?
@ -80,26 +67,12 @@ struct LRUHandle {
IS_HIGH_PRI = (1 << 1),
// Whether this entry is in high-pri pool.
IN_HIGH_PRI_POOL = (1 << 2),
// Whether this entry has had any lookups (hits).
// Wwhether this entry has had any lookups (hits).
HAS_HIT = (1 << 3),
// Can this be inserted into the secondary cache.
IS_SECONDARY_CACHE_COMPATIBLE = (1 << 4),
// Is the handle still being read from a lower tier.
IS_PENDING = (1 << 5),
// Whether this handle is still in a lower tier
IS_IN_SECONDARY_CACHE = (1 << 6),
};
uint8_t flags;
#ifdef __SANITIZE_THREAD__
// TSAN can report a false data race on flags, where one thread is writing
// to one of the mutable bits and another thread is reading this immutable
// bit. So precisely suppress that TSAN warning, we separate out this bit
// during TSAN runs.
bool is_secondary_cache_compatible_for_tsan;
#endif // __SANITIZE_THREAD__
// Beginning of the key (MUST BE THE LAST FIELD IN THIS STRUCT!)
char key_data[1];
@ -122,15 +95,6 @@ struct LRUHandle {
bool IsHighPri() const { return flags & IS_HIGH_PRI; }
bool InHighPriPool() const { return flags & IN_HIGH_PRI_POOL; }
bool HasHit() const { return flags & HAS_HIT; }
bool IsSecondaryCacheCompatible() const {
#ifdef __SANITIZE_THREAD__
return is_secondary_cache_compatible_for_tsan;
#else
return flags & IS_SECONDARY_CACHE_COMPATIBLE;
#endif // __SANITIZE_THREAD__
}
bool IsPending() const { return flags & IS_PENDING; }
bool IsInSecondaryCache() const { return flags & IS_IN_SECONDARY_CACHE; }
void SetInCache(bool in_cache) {
if (in_cache) {
@ -158,58 +122,15 @@ struct LRUHandle {
void SetHit() { flags |= HAS_HIT; }
void SetSecondaryCacheCompatible(bool compat) {
if (compat) {
flags |= IS_SECONDARY_CACHE_COMPATIBLE;
} else {
flags &= ~IS_SECONDARY_CACHE_COMPATIBLE;
}
#ifdef __SANITIZE_THREAD__
is_secondary_cache_compatible_for_tsan = compat;
#endif // __SANITIZE_THREAD__
}
void SetIncomplete(bool incomp) {
if (incomp) {
flags |= IS_PENDING;
} else {
flags &= ~IS_PENDING;
}
}
void SetIsInSecondaryCache(bool is_in_secondary_cache) {
if (is_in_secondary_cache) {
flags |= IS_IN_SECONDARY_CACHE;
} else {
flags &= ~IS_IN_SECONDARY_CACHE;
}
}
void Free() {
assert(refs == 0);
#ifdef __SANITIZE_THREAD__
// Here we can safely assert they are the same without a data race reported
assert(((flags & IS_SECONDARY_CACHE_COMPATIBLE) != 0) ==
is_secondary_cache_compatible_for_tsan);
#endif // __SANITIZE_THREAD__
if (!IsSecondaryCacheCompatible() && info_.deleter) {
(*info_.deleter)(key(), value);
} else if (IsSecondaryCacheCompatible()) {
if (IsPending()) {
assert(sec_handle != nullptr);
SecondaryCacheResultHandle* tmp_sec_handle = sec_handle;
tmp_sec_handle->Wait();
value = tmp_sec_handle->Value();
delete tmp_sec_handle;
}
if (value) {
(*info_.helper->del_cb)(key(), value);
}
if (deleter) {
(*deleter)(key(), value);
}
delete[] reinterpret_cast<char*>(this);
}
// Calculate the memory usage by metadata.
// Caclculate the memory usage by metadata
inline size_t CalcTotalCharge(
CacheMetadataChargePolicy metadata_charge_policy) {
size_t meta_charge = 0;
@ -217,7 +138,7 @@ struct LRUHandle {
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
meta_charge += malloc_usable_size(static_cast<void*>(this));
#else
// This is the size that is used when a new handle is created.
// This is the size that is used when a new handle is created
meta_charge += sizeof(LRUHandle) - 1 + key_length;
#endif
}
@ -232,10 +153,7 @@ struct LRUHandle {
// 4.4.3's builtin hashtable.
class LRUHandleTable {
public:
// If the table uses more hash bits than `max_upper_hash_bits`,
// it will eat into the bits used for sharding, which are constant
// for a given LRUHandleTable.
explicit LRUHandleTable(int max_upper_hash_bits);
LRUHandleTable();
~LRUHandleTable();
LRUHandle* Lookup(const Slice& key, uint32_t hash);
@ -243,8 +161,8 @@ class LRUHandleTable {
LRUHandle* Remove(const Slice& key, uint32_t hash);
template <typename T>
void ApplyToEntriesRange(T func, uint32_t index_begin, uint32_t index_end) {
for (uint32_t i = index_begin; i < index_end; i++) {
void ApplyToAllCacheEntries(T func) {
for (uint32_t i = 0; i < length_; i++) {
LRUHandle* h = list_[i];
while (h != nullptr) {
auto n = h->next_hash;
@ -255,8 +173,6 @@ class LRUHandleTable {
}
}
int GetLengthBits() const { return length_bits_; }
private:
// Return a pointer to slot that points to a cache entry that
// matches key/hash. If there is no such cache entry, return a
@ -265,19 +181,11 @@ class LRUHandleTable {
void Resize();
// Number of hash bits (upper because lower bits used for sharding)
// used for table index. Length == 1 << length_bits_
int length_bits_;
// The table consists of an array of buckets where each bucket is
// a linked list of cache entries that hash into the bucket.
std::unique_ptr<LRUHandle*[]> list_;
// Number of elements currently in the table.
LRUHandle** list_;
uint32_t length_;
uint32_t elems_;
// Set from max_upper_hash_bits (see constructor).
const int max_length_bits_;
};
// A single shard of sharded cache.
@ -285,14 +193,12 @@ class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard {
public:
LRUCacheShard(size_t capacity, bool strict_capacity_limit,
double high_pri_pool_ratio, bool use_adaptive_mutex,
CacheMetadataChargePolicy metadata_charge_policy,
int max_upper_hash_bits,
const std::shared_ptr<SecondaryCache>& secondary_cache);
CacheMetadataChargePolicy metadata_charge_policy);
virtual ~LRUCacheShard() override = default;
// Separate from constructor so caller can easily make an array of LRUCache
// if current usage is more than new capacity, the function will attempt to
// free the needed space.
// free the needed space
virtual void SetCapacity(size_t capacity) override;
// Set the flag to reject insertion if cache if full.
@ -303,37 +209,14 @@ class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard {
// Like Cache methods, but with an extra "hash" parameter.
virtual Status Insert(const Slice& key, uint32_t hash, void* value,
size_t charge, Cache::DeleterFn deleter,
size_t charge,
void (*deleter)(const Slice& key, void* value),
Cache::Handle** handle,
Cache::Priority priority) override {
return Insert(key, hash, value, charge, deleter, nullptr, handle, priority);
}
virtual Status Insert(const Slice& key, uint32_t hash, void* value,
const Cache::CacheItemHelper* helper, size_t charge,
Cache::Handle** handle,
Cache::Priority priority) override {
assert(helper);
return Insert(key, hash, value, charge, nullptr, helper, handle, priority);
}
// If helper_cb is null, the values of the following arguments don't matter.
virtual Cache::Handle* Lookup(const Slice& key, uint32_t hash,
const ShardedCache::CacheItemHelper* helper,
const ShardedCache::CreateCallback& create_cb,
ShardedCache::Priority priority, bool wait,
Statistics* stats) override;
virtual Cache::Handle* Lookup(const Slice& key, uint32_t hash) override {
return Lookup(key, hash, nullptr, nullptr, Cache::Priority::LOW, true,
nullptr);
}
virtual bool Release(Cache::Handle* handle, bool /*useful*/,
bool erase_if_last_ref) override {
return Release(handle, erase_if_last_ref);
}
virtual bool IsReady(Cache::Handle* /*handle*/) override;
virtual void Wait(Cache::Handle* /*handle*/) override {}
Cache::Priority priority) override;
virtual Cache::Handle* Lookup(const Slice& key, uint32_t hash) override;
virtual bool Ref(Cache::Handle* handle) override;
virtual bool Release(Cache::Handle* handle,
bool erase_if_last_ref = false) override;
bool force_erase = false) override;
virtual void Erase(const Slice& key, uint32_t hash) override;
// Although in some platforms the update of size_t is atomic, to make sure
@ -343,10 +226,8 @@ class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard {
virtual size_t GetUsage() const override;
virtual size_t GetPinnedUsage() const override;
virtual void ApplyToSomeEntries(
const std::function<void(const Slice& key, void* value, size_t charge,
DeleterFn deleter)>& callback,
uint32_t average_entries_per_lock, uint32_t* state) override;
virtual void ApplyToAllCacheEntries(void (*callback)(void*, size_t),
bool thread_safe) override;
virtual void EraseUnRefEntries() override;
@ -354,33 +235,14 @@ class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard {
void TEST_GetLRUList(LRUHandle** lru, LRUHandle** lru_low_pri);
// Retrieves number of elements in LRU, for unit test purpose only.
// Not threadsafe.
// Retrieves number of elements in LRU, for unit test purpose only
// not threadsafe
size_t TEST_GetLRUSize();
// Retrieves high pri pool ratio
// Retrives high pri pool ratio
double GetHighPriPoolRatio();
private:
friend class LRUCache;
// Insert an item into the hash table and, if handle is null, insert into
// the LRU list. Older items are evicted as necessary. If the cache is full
// and free_handle_on_fail is true, the item is deleted and handle is set to
// nullptr.
Status InsertItem(LRUHandle* item, Cache::Handle** handle,
bool free_handle_on_fail);
Status Insert(const Slice& key, uint32_t hash, void* value, size_t charge,
DeleterFn deleter, const Cache::CacheItemHelper* helper,
Cache::Handle** handle, Cache::Priority priority);
// Promote an item looked up from the secondary cache to the LRU cache.
// The item may be still in the secondary cache.
// It is only inserted into the hash table and not the LRU list, and only
// if the cache is not at full capacity, as is the case during Insert. The
// caller should hold a reference on the LRUHandle. When the caller releases
// the last reference, the item is added to the LRU list.
// The item is promoted to the high pri or low pri pool as specified by the
// caller in Lookup.
void Promote(LRUHandle* e);
void LRU_Remove(LRUHandle* e);
void LRU_Insert(LRUHandle* e);
@ -391,7 +253,7 @@ class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard {
// Free some space following strict LRU policy until enough space
// to hold (usage_ + charge) is freed or the lru list is empty
// This function is not thread safe - it needs to be executed while
// holding the mutex_.
// holding the mutex_
void EvictFromLRU(size_t charge, autovector<LRUHandle*>* deleted);
// Initialized before use.
@ -431,18 +293,16 @@ class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard {
// ------------vvvvvvvvvvvvv-----------
LRUHandleTable table_;
// Memory size for entries residing in the cache.
// Memory size for entries residing in the cache
size_t usage_;
// Memory size for entries residing only in the LRU list.
// Memory size for entries residing only in the LRU list
size_t lru_usage_;
// mutex_ protects the following state.
// We don't count mutex_ as the cache's internal state so semantically we
// don't mind mutex_ invoking the non-const actions.
mutable port::Mutex mutex_;
std::shared_ptr<SecondaryCache> secondary_cache_;
};
class LRUCache
@ -456,34 +316,24 @@ class LRUCache
std::shared_ptr<MemoryAllocator> memory_allocator = nullptr,
bool use_adaptive_mutex = kDefaultToAdaptiveMutex,
CacheMetadataChargePolicy metadata_charge_policy =
kDontChargeCacheMetadata,
const std::shared_ptr<SecondaryCache>& secondary_cache = nullptr);
kDontChargeCacheMetadata);
virtual ~LRUCache();
virtual const char* Name() const override { return "LRUCache"; }
virtual CacheShard* GetShard(uint32_t shard) override;
virtual const CacheShard* GetShard(uint32_t shard) const override;
virtual CacheShard* GetShard(int shard) override;
virtual const CacheShard* GetShard(int shard) const override;
virtual void* Value(Handle* handle) override;
virtual size_t GetCharge(Handle* handle) const override;
virtual uint32_t GetHash(Handle* handle) const override;
virtual DeleterFn GetDeleter(Handle* handle) const override;
virtual void DisownData() override;
virtual void WaitAll(std::vector<Handle*>& handles) override;
// Retrieves number of elements in LRU, for unit test purpose only.
// Retrieves number of elements in LRU, for unit test purpose only
size_t TEST_GetLRUSize();
// Retrieves high pri pool ratio.
// Retrives high pri pool ratio
double GetHighPriPoolRatio();
private:
LRUCacheShard* shards_ = nullptr;
int num_shards_ = 0;
std::shared_ptr<SecondaryCache> secondary_cache_;
};
} // namespace lru_cache
using LRUCache = lru_cache::LRUCache;
using LRUHandle = lru_cache::LRUHandle;
using LRUCacheShard = lru_cache::LRUCacheShard;
} // namespace ROCKSDB_NAMESPACE

1667
cache/lru_cache_test.cc vendored

File diff suppressed because it is too large Load Diff

117
cache/sharded_cache.cc vendored
View File

@ -9,110 +9,61 @@
#include "cache/sharded_cache.h"
#include <algorithm>
#include <cstdint>
#include <memory>
#include <string>
#include "util/hash.h"
#include "util/math.h"
#include "util/mutexlock.h"
namespace ROCKSDB_NAMESPACE {
namespace {
inline uint32_t HashSlice(const Slice& s) {
return Lower32of64(GetSliceNPHash64(s));
}
} // namespace
ShardedCache::ShardedCache(size_t capacity, int num_shard_bits,
bool strict_capacity_limit,
std::shared_ptr<MemoryAllocator> allocator)
: Cache(std::move(allocator)),
shard_mask_((uint32_t{1} << num_shard_bits) - 1),
num_shard_bits_(num_shard_bits),
capacity_(capacity),
strict_capacity_limit_(strict_capacity_limit),
last_id_(1) {}
void ShardedCache::SetCapacity(size_t capacity) {
uint32_t num_shards = GetNumShards();
int num_shards = 1 << num_shard_bits_;
const size_t per_shard = (capacity + (num_shards - 1)) / num_shards;
MutexLock l(&capacity_mutex_);
for (uint32_t s = 0; s < num_shards; s++) {
for (int s = 0; s < num_shards; s++) {
GetShard(s)->SetCapacity(per_shard);
}
capacity_ = capacity;
}
void ShardedCache::SetStrictCapacityLimit(bool strict_capacity_limit) {
uint32_t num_shards = GetNumShards();
int num_shards = 1 << num_shard_bits_;
MutexLock l(&capacity_mutex_);
for (uint32_t s = 0; s < num_shards; s++) {
for (int s = 0; s < num_shards; s++) {
GetShard(s)->SetStrictCapacityLimit(strict_capacity_limit);
}
strict_capacity_limit_ = strict_capacity_limit;
}
Status ShardedCache::Insert(const Slice& key, void* value, size_t charge,
DeleterFn deleter, Handle** handle,
Priority priority) {
void (*deleter)(const Slice& key, void* value),
Handle** handle, Priority priority) {
uint32_t hash = HashSlice(key);
return GetShard(Shard(hash))
->Insert(key, hash, value, charge, deleter, handle, priority);
}
Status ShardedCache::Insert(const Slice& key, void* value,
const CacheItemHelper* helper, size_t charge,
Handle** handle, Priority priority) {
uint32_t hash = HashSlice(key);
if (!helper) {
return Status::InvalidArgument();
}
return GetShard(Shard(hash))
->Insert(key, hash, value, helper, charge, handle, priority);
}
Cache::Handle* ShardedCache::Lookup(const Slice& key, Statistics* /*stats*/) {
uint32_t hash = HashSlice(key);
return GetShard(Shard(hash))->Lookup(key, hash);
}
Cache::Handle* ShardedCache::Lookup(const Slice& key,
const CacheItemHelper* helper,
const CreateCallback& create_cb,
Priority priority, bool wait,
Statistics* stats) {
uint32_t hash = HashSlice(key);
return GetShard(Shard(hash))
->Lookup(key, hash, helper, create_cb, priority, wait, stats);
}
bool ShardedCache::IsReady(Handle* handle) {
uint32_t hash = GetHash(handle);
return GetShard(Shard(hash))->IsReady(handle);
}
void ShardedCache::Wait(Handle* handle) {
uint32_t hash = GetHash(handle);
GetShard(Shard(hash))->Wait(handle);
}
bool ShardedCache::Ref(Handle* handle) {
uint32_t hash = GetHash(handle);
return GetShard(Shard(hash))->Ref(handle);
}
bool ShardedCache::Release(Handle* handle, bool erase_if_last_ref) {
bool ShardedCache::Release(Handle* handle, bool force_erase) {
uint32_t hash = GetHash(handle);
return GetShard(Shard(hash))->Release(handle, erase_if_last_ref);
}
bool ShardedCache::Release(Handle* handle, bool useful,
bool erase_if_last_ref) {
uint32_t hash = GetHash(handle);
return GetShard(Shard(hash))->Release(handle, useful, erase_if_last_ref);
return GetShard(Shard(hash))->Release(handle, force_erase);
}
void ShardedCache::Erase(const Slice& key) {
@ -136,9 +87,9 @@ bool ShardedCache::HasStrictCapacityLimit() const {
size_t ShardedCache::GetUsage() const {
// We will not lock the cache when getting the usage from shards.
uint32_t num_shards = GetNumShards();
int num_shards = 1 << num_shard_bits_;
size_t usage = 0;
for (uint32_t s = 0; s < num_shards; s++) {
for (int s = 0; s < num_shards; s++) {
usage += GetShard(s)->GetUsage();
}
return usage;
@ -150,42 +101,25 @@ size_t ShardedCache::GetUsage(Handle* handle) const {
size_t ShardedCache::GetPinnedUsage() const {
// We will not lock the cache when getting the usage from shards.
uint32_t num_shards = GetNumShards();
int num_shards = 1 << num_shard_bits_;
size_t usage = 0;
for (uint32_t s = 0; s < num_shards; s++) {
for (int s = 0; s < num_shards; s++) {
usage += GetShard(s)->GetPinnedUsage();
}
return usage;
}
void ShardedCache::ApplyToAllEntries(
const std::function<void(const Slice& key, void* value, size_t charge,
DeleterFn deleter)>& callback,
const ApplyToAllEntriesOptions& opts) {
uint32_t num_shards = GetNumShards();
// Iterate over part of each shard, rotating between shards, to
// minimize impact on latency of concurrent operations.
std::unique_ptr<uint32_t[]> states(new uint32_t[num_shards]{});
uint32_t aepl_in_32 = static_cast<uint32_t>(
std::min(size_t{UINT32_MAX}, opts.average_entries_per_lock));
aepl_in_32 = std::min(aepl_in_32, uint32_t{1});
bool remaining_work;
do {
remaining_work = false;
for (uint32_t s = 0; s < num_shards; s++) {
if (states[s] != UINT32_MAX) {
GetShard(s)->ApplyToSomeEntries(callback, aepl_in_32, &states[s]);
remaining_work |= states[s] != UINT32_MAX;
}
}
} while (remaining_work);
void ShardedCache::ApplyToAllCacheEntries(void (*callback)(void*, size_t),
bool thread_safe) {
int num_shards = 1 << num_shard_bits_;
for (int s = 0; s < num_shards; s++) {
GetShard(s)->ApplyToAllCacheEntries(callback, thread_safe);
}
}
void ShardedCache::EraseUnRefEntries() {
uint32_t num_shards = GetNumShards();
for (uint32_t s = 0; s < num_shards; s++) {
int num_shards = 1 << num_shard_bits_;
for (int s = 0; s < num_shards; s++) {
GetShard(s)->EraseUnRefEntries();
}
}
@ -200,8 +134,7 @@ std::string ShardedCache::GetPrintableOptions() const {
snprintf(buffer, kBufferSize, " capacity : %" ROCKSDB_PRIszt "\n",
capacity_);
ret.append(buffer);
snprintf(buffer, kBufferSize, " num_shard_bits : %d\n",
GetNumShardBits());
snprintf(buffer, kBufferSize, " num_shard_bits : %d\n", num_shard_bits_);
ret.append(buffer);
snprintf(buffer, kBufferSize, " strict_capacity_limit : %d\n",
strict_capacity_limit_);
@ -226,8 +159,4 @@ int GetDefaultCacheShardBits(size_t capacity) {
return num_shard_bits;
}
int ShardedCache::GetNumShardBits() const { return BitsSetToOne(shard_mask_); }
uint32_t ShardedCache::GetNumShards() const { return shard_mask_ + 1; }
} // namespace ROCKSDB_NAMESPACE

77
cache/sharded_cache.h vendored
View File

@ -14,6 +14,7 @@
#include "port/port.h"
#include "rocksdb/cache.h"
#include "util/hash.h"
namespace ROCKSDB_NAMESPACE {
@ -23,38 +24,20 @@ class CacheShard {
CacheShard() = default;
virtual ~CacheShard() = default;
using DeleterFn = Cache::DeleterFn;
virtual Status Insert(const Slice& key, uint32_t hash, void* value,
size_t charge, DeleterFn deleter,
Cache::Handle** handle, Cache::Priority priority) = 0;
virtual Status Insert(const Slice& key, uint32_t hash, void* value,
const Cache::CacheItemHelper* helper, size_t charge,
size_t charge,
void (*deleter)(const Slice& key, void* value),
Cache::Handle** handle, Cache::Priority priority) = 0;
virtual Cache::Handle* Lookup(const Slice& key, uint32_t hash) = 0;
virtual Cache::Handle* Lookup(const Slice& key, uint32_t hash,
const Cache::CacheItemHelper* helper,
const Cache::CreateCallback& create_cb,
Cache::Priority priority, bool wait,
Statistics* stats) = 0;
virtual bool Release(Cache::Handle* handle, bool useful,
bool erase_if_last_ref) = 0;
virtual bool IsReady(Cache::Handle* handle) = 0;
virtual void Wait(Cache::Handle* handle) = 0;
virtual bool Ref(Cache::Handle* handle) = 0;
virtual bool Release(Cache::Handle* handle, bool erase_if_last_ref) = 0;
virtual bool Release(Cache::Handle* handle, bool force_erase = false) = 0;
virtual void Erase(const Slice& key, uint32_t hash) = 0;
virtual void SetCapacity(size_t capacity) = 0;
virtual void SetStrictCapacityLimit(bool strict_capacity_limit) = 0;
virtual size_t GetUsage() const = 0;
virtual size_t GetPinnedUsage() const = 0;
// Handles iterating over roughly `average_entries_per_lock` entries, using
// `state` to somehow record where it last ended up. Caller initially uses
// *state == 0 and implementation sets *state = UINT32_MAX to indicate
// completion.
virtual void ApplyToSomeEntries(
const std::function<void(const Slice& key, void* value, size_t charge,
DeleterFn deleter)>& callback,
uint32_t average_entries_per_lock, uint32_t* state) = 0;
virtual void ApplyToAllCacheEntries(void (*callback)(void*, size_t),
bool thread_safe) = 0;
virtual void EraseUnRefEntries() = 0;
virtual std::string GetPrintableOptions() const { return ""; }
void set_metadata_charge_policy(
@ -74,31 +57,24 @@ class ShardedCache : public Cache {
ShardedCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit,
std::shared_ptr<MemoryAllocator> memory_allocator = nullptr);
virtual ~ShardedCache() = default;
virtual CacheShard* GetShard(uint32_t shard) = 0;
virtual const CacheShard* GetShard(uint32_t shard) const = 0;
virtual const char* Name() const override = 0;
virtual CacheShard* GetShard(int shard) = 0;
virtual const CacheShard* GetShard(int shard) const = 0;
virtual void* Value(Handle* handle) override = 0;
virtual size_t GetCharge(Handle* handle) const override = 0;
virtual uint32_t GetHash(Handle* handle) const = 0;
virtual void DisownData() override = 0;
virtual void SetCapacity(size_t capacity) override;
virtual void SetStrictCapacityLimit(bool strict_capacity_limit) override;
virtual Status Insert(const Slice& key, void* value, size_t charge,
DeleterFn deleter, Handle** handle,
Priority priority) override;
virtual Status Insert(const Slice& key, void* value,
const CacheItemHelper* helper, size_t chargge,
Handle** handle = nullptr,
Priority priority = Priority::LOW) override;
void (*deleter)(const Slice& key, void* value),
Handle** handle, Priority priority) override;
virtual Handle* Lookup(const Slice& key, Statistics* stats) override;
virtual Handle* Lookup(const Slice& key, const CacheItemHelper* helper,
const CreateCallback& create_cb, Priority priority,
bool wait, Statistics* stats = nullptr) override;
virtual bool Release(Handle* handle, bool useful,
bool erase_if_last_ref = false) override;
virtual bool IsReady(Handle* handle) override;
virtual void Wait(Handle* handle) override;
virtual bool Ref(Handle* handle) override;
virtual bool Release(Handle* handle, bool erase_if_last_ref = false) override;
virtual bool Release(Handle* handle, bool force_erase = false) override;
virtual void Erase(const Slice& key) override;
virtual uint64_t NewId() override;
virtual size_t GetCapacity() const override;
@ -106,21 +82,24 @@ class ShardedCache : public Cache {
virtual size_t GetUsage() const override;
virtual size_t GetUsage(Handle* handle) const override;
virtual size_t GetPinnedUsage() const override;
virtual void ApplyToAllEntries(
const std::function<void(const Slice& key, void* value, size_t charge,
DeleterFn deleter)>& callback,
const ApplyToAllEntriesOptions& opts) override;
virtual void ApplyToAllCacheEntries(void (*callback)(void*, size_t),
bool thread_safe) override;
virtual void EraseUnRefEntries() override;
virtual std::string GetPrintableOptions() const override;
int GetNumShardBits() const;
uint32_t GetNumShards() const;
protected:
inline uint32_t Shard(uint32_t hash) { return hash & shard_mask_; }
int GetNumShardBits() const { return num_shard_bits_; }
private:
const uint32_t shard_mask_;
static inline uint32_t HashSlice(const Slice& s) {
return static_cast<uint32_t>(GetSliceNPHash64(s));
}
uint32_t Shard(uint32_t hash) {
// Note, hash >> 32 yields hash in gcc, not the zero we expect!
return (num_shard_bits_ > 0) ? (hash >> (32 - num_shard_bits_)) : 0;
}
int num_shard_bits_;
mutable port::Mutex capacity_mutex_;
size_t capacity_;
bool strict_capacity_limit_;

View File

@ -1,26 +0,0 @@
# - Find liburing
#
# uring_INCLUDE_DIR - Where to find liburing.h
# uring_LIBRARIES - List of libraries when using uring.
# uring_FOUND - True if uring found.
find_path(uring_INCLUDE_DIR
NAMES liburing.h)
find_library(uring_LIBRARIES
NAMES liburing.a liburing)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(uring
DEFAULT_MSG uring_LIBRARIES uring_INCLUDE_DIR)
mark_as_advanced(
uring_INCLUDE_DIR
uring_LIBRARIES)
if(uring_FOUND AND NOT TARGET uring::uring)
add_library(uring::uring UNKNOWN IMPORTED)
set_target_properties(uring::uring PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${uring_INCLUDE_DIR}"
IMPORTED_LINK_INTERFACE_LANGUAGES "C"
IMPORTED_LOCATION "${uring_LIBRARIES}")
endif()

View File

@ -1,30 +0,0 @@
ifndef PYTHON
# Default to python3. Some distros like CentOS 8 do not have `python`.
ifeq ($(origin PYTHON), undefined)
PYTHON := $(shell which python3 || which python || echo python3)
endif
export PYTHON
endif
# To setup tmp directory, first recognize some old variables for setting
# test tmp directory or base tmp directory. TEST_TMPDIR is usually read
# by RocksDB tools though Env/FileSystem::GetTestDirectory.
ifeq ($(TEST_TMPDIR),)
TEST_TMPDIR := $(TMPD)
endif
ifeq ($(TEST_TMPDIR),)
ifeq ($(BASE_TMPDIR),)
BASE_TMPDIR :=$(TMPDIR)
endif
ifeq ($(BASE_TMPDIR),)
BASE_TMPDIR :=/tmp
endif
# Use /dev/shm if it has the sticky bit set (otherwise, /tmp or other
# base dir), and create a randomly-named rocksdb.XXXX directory therein.
TEST_TMPDIR := $(shell f=/dev/shm; test -k $$f || f=$(BASE_TMPDIR); \
perl -le 'use File::Temp "tempdir";' \
-e 'print tempdir("'$$f'/rocksdb.XXXX", CLEANUP => 0)')
endif
export TEST_TMPDIR

View File

@ -12,7 +12,7 @@ fi
ROOT=".."
# Fetch right version of gcov
if [ -d /mnt/gvfs/third-party -a -z "$CXX" ]; then
source $ROOT/build_tools/fbcode_config_platform009.sh
source $ROOT/build_tools/fbcode_config_platform007.sh
GCOV=$GCC_BASE/bin/gcov
else
GCOV=$(which gcov)
@ -24,7 +24,7 @@ mkdir -p $COVERAGE_DIR
# Find all gcno files to generate the coverage report
PYTHON=${1:-`which python3`}
PYTHON=${1:-`which python`}
echo -e "Using $PYTHON"
GCNO_FILES=`find $ROOT -name "*.gcno"`
$GCOV --preserve-paths --relative-only --no-output $GCNO_FILES 2>/dev/null |

View File

@ -1,93 +0,0 @@
# This file is used by Meta-internal infrastructure as well as by Makefile
# When included from Makefile, there are rules to build DB_STRESS_CMD. When
# used directly with `make -f crashtest.mk ...` there will be no rules to
# build DB_STRESS_CMD so it must exist prior.
DB_STRESS_CMD?=./db_stress
include common.mk
CRASHTEST_MAKE=$(MAKE) -f crash_test.mk
CRASHTEST_PY=$(PYTHON) -u tools/db_crashtest.py --stress_cmd=$(DB_STRESS_CMD)
.PHONY: crash_test crash_test_with_atomic_flush crash_test_with_txn \
crash_test_with_best_efforts_recovery crash_test_with_ts \
blackbox_crash_test blackbox_crash_test_with_atomic_flush \
blackbox_crash_test_with_txn blackbox_crash_test_with_ts \
blackbox_crash_test_with_best_efforts_recovery \
whitebox_crash_test whitebox_crash_test_with_atomic_flush \
whitebox_crash_test_with_txn whitebox_crash_test_with_ts \
blackbox_crash_test_with_multiops_wc_txn \
blackbox_crash_test_with_multiops_wp_txn
crash_test: $(DB_STRESS_CMD)
# Do not parallelize
$(CRASHTEST_MAKE) whitebox_crash_test
$(CRASHTEST_MAKE) blackbox_crash_test
crash_test_with_atomic_flush: $(DB_STRESS_CMD)
# Do not parallelize
$(CRASHTEST_MAKE) whitebox_crash_test_with_atomic_flush
$(CRASHTEST_MAKE) blackbox_crash_test_with_atomic_flush
crash_test_with_txn: $(DB_STRESS_CMD)
# Do not parallelize
$(CRASHTEST_MAKE) whitebox_crash_test_with_txn
$(CRASHTEST_MAKE) blackbox_crash_test_with_txn
crash_test_with_best_efforts_recovery: blackbox_crash_test_with_best_efforts_recovery
crash_test_with_ts: $(DB_STRESS_CMD)
# Do not parallelize
$(CRASHTEST_MAKE) whitebox_crash_test_with_ts
$(CRASHTEST_MAKE) blackbox_crash_test_with_ts
crash_test_with_multiops_wc_txn: $(DB_STRESS_CMD)
$(CRASHTEST_MAKE) blackbox_crash_test_with_multiops_wc_txn
crash_test_with_multiops_wp_txn: $(DB_STRESS_CMD)
$(CRASHTEST_MAKE) blackbox_crash_test_with_multiops_wp_txn
blackbox_crash_test: $(DB_STRESS_CMD)
$(CRASHTEST_PY) --simple blackbox $(CRASH_TEST_EXT_ARGS)
$(CRASHTEST_PY) blackbox $(CRASH_TEST_EXT_ARGS)
blackbox_crash_test_with_atomic_flush: $(DB_STRESS_CMD)
$(CRASHTEST_PY) --cf_consistency blackbox $(CRASH_TEST_EXT_ARGS)
blackbox_crash_test_with_txn: $(DB_STRESS_CMD)
$(CRASHTEST_PY) --txn blackbox $(CRASH_TEST_EXT_ARGS)
blackbox_crash_test_with_best_efforts_recovery: $(DB_STRESS_CMD)
$(CRASHTEST_PY) --test_best_efforts_recovery blackbox $(CRASH_TEST_EXT_ARGS)
blackbox_crash_test_with_ts: $(DB_STRESS_CMD)
$(CRASHTEST_PY) --enable_ts blackbox $(CRASH_TEST_EXT_ARGS)
blackbox_crash_test_with_multiops_wc_txn: $(DB_STRESS_CMD)
$(CRASHTEST_PY) --test_multiops_txn --write_policy write_committed blackbox $(CRASH_TEST_EXT_ARGS)
blackbox_crash_test_with_multiops_wp_txn: $(DB_STRESS_CMD)
$(CRASHTEST_PY) --test_multiops_txn --write_policy write_prepared blackbox $(CRASH_TEST_EXT_ARGS)
ifeq ($(CRASH_TEST_KILL_ODD),)
CRASH_TEST_KILL_ODD=888887
endif
whitebox_crash_test: $(DB_STRESS_CMD)
$(CRASHTEST_PY) --simple whitebox --random_kill_odd \
$(CRASH_TEST_KILL_ODD) $(CRASH_TEST_EXT_ARGS)
$(CRASHTEST_PY) whitebox --random_kill_odd \
$(CRASH_TEST_KILL_ODD) $(CRASH_TEST_EXT_ARGS)
whitebox_crash_test_with_atomic_flush: $(DB_STRESS_CMD)
$(CRASHTEST_PY) --cf_consistency whitebox --random_kill_odd \
$(CRASH_TEST_KILL_ODD) $(CRASH_TEST_EXT_ARGS)
whitebox_crash_test_with_txn: $(DB_STRESS_CMD)
$(CRASHTEST_PY) --txn whitebox --random_kill_odd \
$(CRASH_TEST_KILL_ODD) $(CRASH_TEST_EXT_ARGS)
whitebox_crash_test_with_ts: $(DB_STRESS_CMD)
$(CRASHTEST_PY) --enable_ts whitebox --random_kill_odd \
$(CRASH_TEST_KILL_ODD) $(CRASH_TEST_EXT_ARGS)

View File

@ -23,27 +23,28 @@ Status ArenaWrappedDBIter::GetProperty(std::string prop_name,
if (prop_name == "rocksdb.iterator.super-version-number") {
// First try to pass the value returned from inner iterator.
if (!db_iter_->GetProperty(prop_name, prop).ok()) {
*prop = std::to_string(sv_number_);
*prop = ToString(sv_number_);
}
return Status::OK();
}
return db_iter_->GetProperty(prop_name, prop);
}
void ArenaWrappedDBIter::Init(
Env* env, const ReadOptions& read_options, const ImmutableOptions& ioptions,
const MutableCFOptions& mutable_cf_options, const Version* version,
const SequenceNumber& sequence, uint64_t max_sequential_skip_in_iteration,
uint64_t version_number, ReadCallback* read_callback, DBImpl* db_impl,
ColumnFamilyData* cfd, bool expose_blob_index, bool allow_refresh) {
void ArenaWrappedDBIter::Init(Env* env, const ReadOptions& read_options,
const ImmutableCFOptions& cf_options,
const MutableCFOptions& mutable_cf_options,
const SequenceNumber& sequence,
uint64_t max_sequential_skip_in_iteration,
uint64_t version_number,
ReadCallback* read_callback, DBImpl* db_impl,
ColumnFamilyData* cfd, bool allow_blob,
bool allow_refresh) {
auto mem = arena_.AllocateAligned(sizeof(DBIter));
db_iter_ =
new (mem) DBIter(env, read_options, ioptions, mutable_cf_options,
ioptions.user_comparator, /* iter */ nullptr, version,
sequence, true, max_sequential_skip_in_iteration,
read_callback, db_impl, cfd, expose_blob_index);
db_iter_ = new (mem) DBIter(env, read_options, cf_options, mutable_cf_options,
cf_options.user_comparator, nullptr, sequence,
true, max_sequential_skip_in_iteration,
read_callback, db_impl, cfd, allow_blob);
sv_number_ = version_number;
read_options_ = read_options;
allow_refresh_ = allow_refresh;
}
@ -55,74 +56,48 @@ Status ArenaWrappedDBIter::Refresh() {
// TODO(yiwu): For last_seq_same_as_publish_seq_==false, this is not the
// correct behavior. Will be corrected automatically when we take a snapshot
// here for the case of WritePreparedTxnDB.
SequenceNumber latest_seq = db_impl_->GetLatestSequenceNumber();
uint64_t cur_sv_number = cfd_->GetSuperVersionNumber();
TEST_SYNC_POINT("ArenaWrappedDBIter::Refresh:1");
TEST_SYNC_POINT("ArenaWrappedDBIter::Refresh:2");
while (true) {
if (sv_number_ != cur_sv_number) {
Env* env = db_iter_->env();
db_iter_->~DBIter();
arena_.~Arena();
new (&arena_) Arena();
if (sv_number_ != cur_sv_number) {
Env* env = db_iter_->env();
db_iter_->~DBIter();
arena_.~Arena();
new (&arena_) Arena();
SuperVersion* sv = cfd_->GetReferencedSuperVersion(db_impl_);
SequenceNumber latest_seq = db_impl_->GetLatestSequenceNumber();
if (read_callback_) {
read_callback_->Refresh(latest_seq);
}
Init(env, read_options_, *(cfd_->ioptions()), sv->mutable_cf_options,
sv->current, latest_seq,
sv->mutable_cf_options.max_sequential_skip_in_iterations,
cur_sv_number, read_callback_, db_impl_, cfd_, expose_blob_index_,
allow_refresh_);
InternalIterator* internal_iter = db_impl_->NewInternalIterator(
read_options_, cfd_, sv, &arena_, db_iter_->GetRangeDelAggregator(),
latest_seq, /* allow_unprepared_value */ true);
SetIterUnderDBIter(internal_iter);
break;
} else {
SequenceNumber latest_seq = db_impl_->GetLatestSequenceNumber();
// Refresh range-tombstones in MemTable
if (!read_options_.ignore_range_deletions) {
SuperVersion* sv = cfd_->GetThreadLocalSuperVersion(db_impl_);
ReadRangeDelAggregator* range_del_agg =
db_iter_->GetRangeDelAggregator();
std::unique_ptr<FragmentedRangeTombstoneIterator> range_del_iter;
range_del_iter.reset(
sv->mem->NewRangeTombstoneIterator(read_options_, latest_seq));
range_del_agg->AddTombstones(std::move(range_del_iter));
cfd_->ReturnThreadLocalSuperVersion(sv);
}
// Refresh latest sequence number
db_iter_->set_sequence(latest_seq);
db_iter_->set_valid(false);
// Check again if the latest super version number is changed
uint64_t latest_sv_number = cfd_->GetSuperVersionNumber();
if (latest_sv_number != cur_sv_number) {
// If the super version number is changed after refreshing,
// fallback to Re-Init the InternalIterator
cur_sv_number = latest_sv_number;
continue;
}
break;
SuperVersion* sv = cfd_->GetReferencedSuperVersion(db_impl_);
if (read_callback_) {
read_callback_->Refresh(latest_seq);
}
Init(env, read_options_, *(cfd_->ioptions()), sv->mutable_cf_options,
latest_seq, sv->mutable_cf_options.max_sequential_skip_in_iterations,
cur_sv_number, read_callback_, db_impl_, cfd_, allow_blob_,
allow_refresh_);
InternalIterator* internal_iter = db_impl_->NewInternalIterator(
read_options_, cfd_, sv, &arena_, db_iter_->GetRangeDelAggregator(),
latest_seq, /* allow_unprepared_value */ true);
SetIterUnderDBIter(internal_iter);
} else {
db_iter_->set_sequence(latest_seq);
db_iter_->set_valid(false);
}
return Status::OK();
}
ArenaWrappedDBIter* NewArenaWrappedDbIterator(
Env* env, const ReadOptions& read_options, const ImmutableOptions& ioptions,
const MutableCFOptions& mutable_cf_options, const Version* version,
const SequenceNumber& sequence, uint64_t max_sequential_skip_in_iterations,
uint64_t version_number, ReadCallback* read_callback, DBImpl* db_impl,
ColumnFamilyData* cfd, bool expose_blob_index, bool allow_refresh) {
Env* env, const ReadOptions& read_options,
const ImmutableCFOptions& cf_options,
const MutableCFOptions& mutable_cf_options, const SequenceNumber& sequence,
uint64_t max_sequential_skip_in_iterations, uint64_t version_number,
ReadCallback* read_callback, DBImpl* db_impl, ColumnFamilyData* cfd,
bool allow_blob, bool allow_refresh) {
ArenaWrappedDBIter* iter = new ArenaWrappedDBIter();
iter->Init(env, read_options, ioptions, mutable_cf_options, version, sequence,
iter->Init(env, read_options, cf_options, mutable_cf_options, sequence,
max_sequential_skip_in_iterations, version_number, read_callback,
db_impl, cfd, expose_blob_index, allow_refresh);
db_impl, cfd, allow_blob, allow_refresh);
if (db_impl != nullptr && cfd != nullptr && allow_refresh) {
iter->StoreRefreshInfo(db_impl, cfd, read_callback, expose_blob_index);
iter->StoreRefreshInfo(read_options, db_impl, cfd, read_callback,
allow_blob);
}
return iter;

View File

@ -12,6 +12,7 @@
#include <string>
#include "db/db_impl/db_impl.h"
#include "db/db_iter.h"
#include "db/dbformat.h"
#include "db/range_del_aggregator.h"
#include "memory/arena.h"
#include "options/cf_options.h"
@ -22,7 +23,6 @@
namespace ROCKSDB_NAMESPACE {
class Arena;
class Version;
// A wrapper iterator which wraps DB Iterator and the arena, with which the DB
// iterator is supposed to be allocated. This class is used as an entry point of
@ -33,13 +33,7 @@ class Version;
// the same as the inner DBIter.
class ArenaWrappedDBIter : public Iterator {
public:
~ArenaWrappedDBIter() override {
if (db_iter_ != nullptr) {
db_iter_->~DBIter();
} else {
assert(false);
}
}
virtual ~ArenaWrappedDBIter() { db_iter_->~DBIter(); }
// Get the arena to be used to allocate memory for DBIter to be wrapped,
// as well as child iterators in it.
@ -47,7 +41,6 @@ class ArenaWrappedDBIter : public Iterator {
virtual ReadRangeDelAggregator* GetRangeDelAggregator() {
return db_iter_->GetRangeDelAggregator();
}
const ReadOptions& GetReadOptions() { return read_options_; }
// Set the internal iterator wrapped inside the DB Iterator. Usually it is
// a merging iterator.
@ -77,32 +70,34 @@ class ArenaWrappedDBIter : public Iterator {
Status Refresh() override;
void Init(Env* env, const ReadOptions& read_options,
const ImmutableOptions& ioptions,
const MutableCFOptions& mutable_cf_options, const Version* version,
const ImmutableCFOptions& cf_options,
const MutableCFOptions& mutable_cf_options,
const SequenceNumber& sequence,
uint64_t max_sequential_skip_in_iterations, uint64_t version_number,
ReadCallback* read_callback, DBImpl* db_impl, ColumnFamilyData* cfd,
bool expose_blob_index, bool allow_refresh);
bool allow_blob, bool allow_refresh);
// Store some parameters so we can refresh the iterator at a later point
// with these same params
void StoreRefreshInfo(DBImpl* db_impl, ColumnFamilyData* cfd,
ReadCallback* read_callback, bool expose_blob_index) {
void StoreRefreshInfo(const ReadOptions& read_options, DBImpl* db_impl,
ColumnFamilyData* cfd, ReadCallback* read_callback,
bool allow_blob) {
read_options_ = read_options;
db_impl_ = db_impl;
cfd_ = cfd;
read_callback_ = read_callback;
expose_blob_index_ = expose_blob_index;
allow_blob_ = allow_blob;
}
private:
DBIter* db_iter_ = nullptr;
DBIter* db_iter_;
Arena arena_;
uint64_t sv_number_;
ColumnFamilyData* cfd_ = nullptr;
DBImpl* db_impl_ = nullptr;
ReadOptions read_options_;
ReadCallback* read_callback_;
bool expose_blob_index_ = false;
bool allow_blob_ = false;
bool allow_refresh_ = true;
};
@ -110,10 +105,11 @@ class ArenaWrappedDBIter : public Iterator {
// `db_impl` and `cfd` are used for reneweal. If left null, renewal will not
// be supported.
extern ArenaWrappedDBIter* NewArenaWrappedDbIterator(
Env* env, const ReadOptions& read_options, const ImmutableOptions& ioptions,
const MutableCFOptions& mutable_cf_options, const Version* version,
const SequenceNumber& sequence, uint64_t max_sequential_skip_in_iterations,
uint64_t version_number, ReadCallback* read_callback,
DBImpl* db_impl = nullptr, ColumnFamilyData* cfd = nullptr,
bool expose_blob_index = false, bool allow_refresh = true);
Env* env, const ReadOptions& read_options,
const ImmutableCFOptions& cf_options,
const MutableCFOptions& mutable_cf_options, const SequenceNumber& sequence,
uint64_t max_sequential_skip_in_iterations, uint64_t version_number,
ReadCallback* read_callback, DBImpl* db_impl = nullptr,
ColumnFamilyData* cfd = nullptr, bool allow_blob = false,
bool allow_refresh = true);
} // namespace ROCKSDB_NAMESPACE

View File

@ -1,146 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#include <cassert>
#include "db/blob/blob_garbage_meter.h"
#include "rocksdb/rocksdb_namespace.h"
#include "rocksdb/status.h"
#include "table/internal_iterator.h"
#include "test_util/sync_point.h"
namespace ROCKSDB_NAMESPACE {
// An internal iterator that passes each key-value encountered to
// BlobGarbageMeter as inflow in order to measure the total number and size of
// blobs in the compaction input on a per-blob file basis.
class BlobCountingIterator : public InternalIterator {
public:
BlobCountingIterator(InternalIterator* iter,
BlobGarbageMeter* blob_garbage_meter)
: iter_(iter), blob_garbage_meter_(blob_garbage_meter) {
assert(iter_);
assert(blob_garbage_meter_);
UpdateAndCountBlobIfNeeded();
}
bool Valid() const override { return iter_->Valid() && status_.ok(); }
void SeekToFirst() override {
iter_->SeekToFirst();
UpdateAndCountBlobIfNeeded();
}
void SeekToLast() override {
iter_->SeekToLast();
UpdateAndCountBlobIfNeeded();
}
void Seek(const Slice& target) override {
iter_->Seek(target);
UpdateAndCountBlobIfNeeded();
}
void SeekForPrev(const Slice& target) override {
iter_->SeekForPrev(target);
UpdateAndCountBlobIfNeeded();
}
void Next() override {
assert(Valid());
iter_->Next();
UpdateAndCountBlobIfNeeded();
}
bool NextAndGetResult(IterateResult* result) override {
assert(Valid());
const bool res = iter_->NextAndGetResult(result);
UpdateAndCountBlobIfNeeded();
return res;
}
void Prev() override {
assert(Valid());
iter_->Prev();
UpdateAndCountBlobIfNeeded();
}
Slice key() const override {
assert(Valid());
return iter_->key();
}
Slice user_key() const override {
assert(Valid());
return iter_->user_key();
}
Slice value() const override {
assert(Valid());
return iter_->value();
}
Status status() const override { return status_; }
bool PrepareValue() override {
assert(Valid());
return iter_->PrepareValue();
}
bool MayBeOutOfLowerBound() override {
assert(Valid());
return iter_->MayBeOutOfLowerBound();
}
IterBoundCheck UpperBoundCheckResult() override {
assert(Valid());
return iter_->UpperBoundCheckResult();
}
void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) override {
iter_->SetPinnedItersMgr(pinned_iters_mgr);
}
bool IsKeyPinned() const override {
assert(Valid());
return iter_->IsKeyPinned();
}
bool IsValuePinned() const override {
assert(Valid());
return iter_->IsValuePinned();
}
Status GetProperty(std::string prop_name, std::string* prop) override {
return iter_->GetProperty(prop_name, prop);
}
private:
void UpdateAndCountBlobIfNeeded() {
assert(!iter_->Valid() || iter_->status().ok());
if (!iter_->Valid()) {
status_ = iter_->status();
return;
}
TEST_SYNC_POINT(
"BlobCountingIterator::UpdateAndCountBlobIfNeeded:ProcessInFlow");
status_ = blob_garbage_meter_->ProcessInFlow(key(), value());
}
InternalIterator* iter_;
BlobGarbageMeter* blob_garbage_meter_;
Status status_;
};
} // namespace ROCKSDB_NAMESPACE

View File

@ -1,326 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "db/blob/blob_counting_iterator.h"
#include <string>
#include <vector>
#include "db/blob/blob_garbage_meter.h"
#include "db/blob/blob_index.h"
#include "db/blob/blob_log_format.h"
#include "db/dbformat.h"
#include "test_util/testharness.h"
#include "test_util/testutil.h"
#include "util/vector_iterator.h"
namespace ROCKSDB_NAMESPACE {
void CheckInFlow(const BlobGarbageMeter& blob_garbage_meter,
uint64_t blob_file_number, uint64_t count, uint64_t bytes) {
const auto& flows = blob_garbage_meter.flows();
const auto it = flows.find(blob_file_number);
if (it == flows.end()) {
ASSERT_EQ(count, 0);
ASSERT_EQ(bytes, 0);
return;
}
const auto& in = it->second.GetInFlow();
ASSERT_EQ(in.GetCount(), count);
ASSERT_EQ(in.GetBytes(), bytes);
}
TEST(BlobCountingIteratorTest, CountBlobs) {
// Note: the input consists of three key-values: two are blob references to
// different blob files, while the third one is a plain value.
constexpr char user_key0[] = "key0";
constexpr char user_key1[] = "key1";
constexpr char user_key2[] = "key2";
const std::vector<std::string> keys{
test::KeyStr(user_key0, 1, kTypeBlobIndex),
test::KeyStr(user_key1, 2, kTypeBlobIndex),
test::KeyStr(user_key2, 3, kTypeValue)};
constexpr uint64_t first_blob_file_number = 4;
constexpr uint64_t first_offset = 1000;
constexpr uint64_t first_size = 2000;
std::string first_blob_index;
BlobIndex::EncodeBlob(&first_blob_index, first_blob_file_number, first_offset,
first_size, kNoCompression);
constexpr uint64_t second_blob_file_number = 6;
constexpr uint64_t second_offset = 2000;
constexpr uint64_t second_size = 4000;
std::string second_blob_index;
BlobIndex::EncodeBlob(&second_blob_index, second_blob_file_number,
second_offset, second_size, kNoCompression);
const std::vector<std::string> values{first_blob_index, second_blob_index,
"raw_value"};
assert(keys.size() == values.size());
VectorIterator input(keys, values);
BlobGarbageMeter blob_garbage_meter;
BlobCountingIterator blob_counter(&input, &blob_garbage_meter);
constexpr uint64_t first_expected_bytes =
first_size +
BlobLogRecord::CalculateAdjustmentForRecordHeader(sizeof(user_key0) - 1);
constexpr uint64_t second_expected_bytes =
second_size +
BlobLogRecord::CalculateAdjustmentForRecordHeader(sizeof(user_key1) - 1);
// Call SeekToFirst and iterate forward
blob_counter.SeekToFirst();
ASSERT_TRUE(blob_counter.Valid());
ASSERT_OK(blob_counter.status());
ASSERT_EQ(blob_counter.key(), keys[0]);
ASSERT_EQ(blob_counter.user_key(), user_key0);
ASSERT_EQ(blob_counter.value(), values[0]);
CheckInFlow(blob_garbage_meter, first_blob_file_number, 1,
first_expected_bytes);
CheckInFlow(blob_garbage_meter, second_blob_file_number, 0, 0);
blob_counter.Next();
ASSERT_TRUE(blob_counter.Valid());
ASSERT_OK(blob_counter.status());
ASSERT_EQ(blob_counter.key(), keys[1]);
ASSERT_EQ(blob_counter.user_key(), user_key1);
ASSERT_EQ(blob_counter.value(), values[1]);
CheckInFlow(blob_garbage_meter, first_blob_file_number, 1,
first_expected_bytes);
CheckInFlow(blob_garbage_meter, second_blob_file_number, 1,
second_expected_bytes);
blob_counter.Next();
ASSERT_TRUE(blob_counter.Valid());
ASSERT_OK(blob_counter.status());
ASSERT_EQ(blob_counter.key(), keys[2]);
ASSERT_EQ(blob_counter.user_key(), user_key2);
ASSERT_EQ(blob_counter.value(), values[2]);
CheckInFlow(blob_garbage_meter, first_blob_file_number, 1,
first_expected_bytes);
CheckInFlow(blob_garbage_meter, second_blob_file_number, 1,
second_expected_bytes);
blob_counter.Next();
ASSERT_FALSE(blob_counter.Valid());
ASSERT_OK(blob_counter.status());
CheckInFlow(blob_garbage_meter, first_blob_file_number, 1,
first_expected_bytes);
CheckInFlow(blob_garbage_meter, second_blob_file_number, 1,
second_expected_bytes);
// Do it again using NextAndGetResult
blob_counter.SeekToFirst();
ASSERT_TRUE(blob_counter.Valid());
ASSERT_OK(blob_counter.status());
ASSERT_EQ(blob_counter.key(), keys[0]);
ASSERT_EQ(blob_counter.user_key(), user_key0);
ASSERT_EQ(blob_counter.value(), values[0]);
CheckInFlow(blob_garbage_meter, first_blob_file_number, 2,
2 * first_expected_bytes);
CheckInFlow(blob_garbage_meter, second_blob_file_number, 1,
second_expected_bytes);
{
IterateResult result;
ASSERT_TRUE(blob_counter.NextAndGetResult(&result));
ASSERT_EQ(result.key, keys[1]);
ASSERT_EQ(blob_counter.user_key(), user_key1);
ASSERT_TRUE(blob_counter.Valid());
ASSERT_OK(blob_counter.status());
ASSERT_EQ(blob_counter.key(), keys[1]);
ASSERT_EQ(blob_counter.value(), values[1]);
CheckInFlow(blob_garbage_meter, first_blob_file_number, 2,
2 * first_expected_bytes);
CheckInFlow(blob_garbage_meter, second_blob_file_number, 2,
2 * second_expected_bytes);
}
{
IterateResult result;
ASSERT_TRUE(blob_counter.NextAndGetResult(&result));
ASSERT_EQ(result.key, keys[2]);
ASSERT_EQ(blob_counter.user_key(), user_key2);
ASSERT_TRUE(blob_counter.Valid());
ASSERT_OK(blob_counter.status());
ASSERT_EQ(blob_counter.key(), keys[2]);
ASSERT_EQ(blob_counter.value(), values[2]);
CheckInFlow(blob_garbage_meter, first_blob_file_number, 2,
2 * first_expected_bytes);
CheckInFlow(blob_garbage_meter, second_blob_file_number, 2,
2 * second_expected_bytes);
}
{
IterateResult result;
ASSERT_FALSE(blob_counter.NextAndGetResult(&result));
ASSERT_FALSE(blob_counter.Valid());
ASSERT_OK(blob_counter.status());
CheckInFlow(blob_garbage_meter, first_blob_file_number, 2,
2 * first_expected_bytes);
CheckInFlow(blob_garbage_meter, second_blob_file_number, 2,
2 * second_expected_bytes);
}
// Call SeekToLast and iterate backward
blob_counter.SeekToLast();
ASSERT_TRUE(blob_counter.Valid());
ASSERT_OK(blob_counter.status());
ASSERT_EQ(blob_counter.key(), keys[2]);
ASSERT_EQ(blob_counter.user_key(), user_key2);
ASSERT_EQ(blob_counter.value(), values[2]);
CheckInFlow(blob_garbage_meter, first_blob_file_number, 2,
2 * first_expected_bytes);
CheckInFlow(blob_garbage_meter, second_blob_file_number, 2,
2 * second_expected_bytes);
blob_counter.Prev();
ASSERT_TRUE(blob_counter.Valid());
ASSERT_OK(blob_counter.status());
ASSERT_EQ(blob_counter.key(), keys[1]);
ASSERT_EQ(blob_counter.user_key(), user_key1);
ASSERT_EQ(blob_counter.value(), values[1]);
CheckInFlow(blob_garbage_meter, first_blob_file_number, 2,
2 * first_expected_bytes);
CheckInFlow(blob_garbage_meter, second_blob_file_number, 3,
3 * second_expected_bytes);
blob_counter.Prev();
ASSERT_TRUE(blob_counter.Valid());
ASSERT_OK(blob_counter.status());
ASSERT_EQ(blob_counter.key(), keys[0]);
ASSERT_EQ(blob_counter.user_key(), user_key0);
ASSERT_EQ(blob_counter.value(), values[0]);
CheckInFlow(blob_garbage_meter, first_blob_file_number, 3,
3 * first_expected_bytes);
CheckInFlow(blob_garbage_meter, second_blob_file_number, 3,
3 * second_expected_bytes);
blob_counter.Prev();
ASSERT_FALSE(blob_counter.Valid());
ASSERT_OK(blob_counter.status());
CheckInFlow(blob_garbage_meter, first_blob_file_number, 3,
3 * first_expected_bytes);
CheckInFlow(blob_garbage_meter, second_blob_file_number, 3,
3 * second_expected_bytes);
// Call Seek for all keys (plus one that's greater than all of them)
blob_counter.Seek(keys[0]);
ASSERT_TRUE(blob_counter.Valid());
ASSERT_OK(blob_counter.status());
ASSERT_EQ(blob_counter.key(), keys[0]);
ASSERT_EQ(blob_counter.user_key(), user_key0);
ASSERT_EQ(blob_counter.value(), values[0]);
CheckInFlow(blob_garbage_meter, first_blob_file_number, 4,
4 * first_expected_bytes);
CheckInFlow(blob_garbage_meter, second_blob_file_number, 3,
3 * second_expected_bytes);
blob_counter.Seek(keys[1]);
ASSERT_TRUE(blob_counter.Valid());
ASSERT_OK(blob_counter.status());
ASSERT_EQ(blob_counter.key(), keys[1]);
ASSERT_EQ(blob_counter.user_key(), user_key1);
ASSERT_EQ(blob_counter.value(), values[1]);
CheckInFlow(blob_garbage_meter, first_blob_file_number, 4,
4 * first_expected_bytes);
CheckInFlow(blob_garbage_meter, second_blob_file_number, 4,
4 * second_expected_bytes);
blob_counter.Seek(keys[2]);
ASSERT_TRUE(blob_counter.Valid());
ASSERT_OK(blob_counter.status());
ASSERT_EQ(blob_counter.key(), keys[2]);
ASSERT_EQ(blob_counter.user_key(), user_key2);
ASSERT_EQ(blob_counter.value(), values[2]);
CheckInFlow(blob_garbage_meter, first_blob_file_number, 4,
4 * first_expected_bytes);
CheckInFlow(blob_garbage_meter, second_blob_file_number, 4,
4 * second_expected_bytes);
blob_counter.Seek("zzz");
ASSERT_FALSE(blob_counter.Valid());
ASSERT_OK(blob_counter.status());
CheckInFlow(blob_garbage_meter, first_blob_file_number, 4,
4 * first_expected_bytes);
CheckInFlow(blob_garbage_meter, second_blob_file_number, 4,
4 * second_expected_bytes);
// Call SeekForPrev for all keys (plus one that's less than all of them)
blob_counter.SeekForPrev("aaa");
ASSERT_FALSE(blob_counter.Valid());
ASSERT_OK(blob_counter.status());
CheckInFlow(blob_garbage_meter, first_blob_file_number, 4,
4 * first_expected_bytes);
CheckInFlow(blob_garbage_meter, second_blob_file_number, 4,
4 * second_expected_bytes);
blob_counter.SeekForPrev(keys[0]);
ASSERT_TRUE(blob_counter.Valid());
ASSERT_OK(blob_counter.status());
ASSERT_EQ(blob_counter.key(), keys[0]);
ASSERT_EQ(blob_counter.user_key(), user_key0);
ASSERT_EQ(blob_counter.value(), values[0]);
CheckInFlow(blob_garbage_meter, first_blob_file_number, 5,
5 * first_expected_bytes);
CheckInFlow(blob_garbage_meter, second_blob_file_number, 4,
4 * second_expected_bytes);
blob_counter.SeekForPrev(keys[1]);
ASSERT_TRUE(blob_counter.Valid());
ASSERT_OK(blob_counter.status());
ASSERT_EQ(blob_counter.key(), keys[1]);
ASSERT_EQ(blob_counter.user_key(), user_key1);
ASSERT_EQ(blob_counter.value(), values[1]);
CheckInFlow(blob_garbage_meter, first_blob_file_number, 5,
5 * first_expected_bytes);
CheckInFlow(blob_garbage_meter, second_blob_file_number, 5,
5 * second_expected_bytes);
blob_counter.SeekForPrev(keys[2]);
ASSERT_TRUE(blob_counter.Valid());
ASSERT_OK(blob_counter.status());
ASSERT_EQ(blob_counter.key(), keys[2]);
ASSERT_EQ(blob_counter.user_key(), user_key2);
ASSERT_EQ(blob_counter.value(), values[2]);
CheckInFlow(blob_garbage_meter, first_blob_file_number, 5,
5 * first_expected_bytes);
CheckInFlow(blob_garbage_meter, second_blob_file_number, 5,
5 * second_expected_bytes);
}
TEST(BlobCountingIteratorTest, CorruptBlobIndex) {
const std::vector<std::string> keys{
test::KeyStr("user_key", 1, kTypeBlobIndex)};
const std::vector<std::string> values{"i_am_not_a_blob_index"};
assert(keys.size() == values.size());
VectorIterator input(keys, values);
BlobGarbageMeter blob_garbage_meter;
BlobCountingIterator blob_counter(&input, &blob_garbage_meter);
blob_counter.SeekToFirst();
ASSERT_FALSE(blob_counter.Valid());
ASSERT_NOK(blob_counter.status());
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

View File

@ -1,34 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "db/blob/blob_fetcher.h"
#include "db/version_set.h"
namespace ROCKSDB_NAMESPACE {
Status BlobFetcher::FetchBlob(const Slice& user_key,
const Slice& blob_index_slice,
FilePrefetchBuffer* prefetch_buffer,
PinnableSlice* blob_value,
uint64_t* bytes_read) const {
assert(version_);
return version_->GetBlob(read_options_, user_key, blob_index_slice,
prefetch_buffer, blob_value, bytes_read);
}
Status BlobFetcher::FetchBlob(const Slice& user_key,
const BlobIndex& blob_index,
FilePrefetchBuffer* prefetch_buffer,
PinnableSlice* blob_value,
uint64_t* bytes_read) const {
assert(version_);
return version_->GetBlob(read_options_, user_key, blob_index, prefetch_buffer,
blob_value, bytes_read);
}
} // namespace ROCKSDB_NAMESPACE

View File

@ -1,37 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#include "rocksdb/options.h"
#include "rocksdb/status.h"
namespace ROCKSDB_NAMESPACE {
class Version;
class Slice;
class FilePrefetchBuffer;
class PinnableSlice;
class BlobIndex;
// A thin wrapper around the blob retrieval functionality of Version.
class BlobFetcher {
public:
BlobFetcher(const Version* version, const ReadOptions& read_options)
: version_(version), read_options_(read_options) {}
Status FetchBlob(const Slice& user_key, const Slice& blob_index_slice,
FilePrefetchBuffer* prefetch_buffer,
PinnableSlice* blob_value, uint64_t* bytes_read) const;
Status FetchBlob(const Slice& user_key, const BlobIndex& blob_index,
FilePrefetchBuffer* prefetch_buffer,
PinnableSlice* blob_value, uint64_t* bytes_read) const;
private:
const Version* version_;
ReadOptions read_options_;
};
} // namespace ROCKSDB_NAMESPACE

View File

@ -135,8 +135,7 @@ std::ostream& operator<<(std::ostream& os,
<< " total_blob_count: " << blob_file_addition.GetTotalBlobCount()
<< " total_blob_bytes: " << blob_file_addition.GetTotalBlobBytes()
<< " checksum_method: " << blob_file_addition.GetChecksumMethod()
<< " checksum_value: "
<< Slice(blob_file_addition.GetChecksumValue()).ToString(/* hex */ true);
<< " checksum_value: " << blob_file_addition.GetChecksumValue();
return os;
}
@ -147,8 +146,7 @@ JSONWriter& operator<<(JSONWriter& jw,
<< "TotalBlobCount" << blob_file_addition.GetTotalBlobCount()
<< "TotalBlobBytes" << blob_file_addition.GetTotalBlobBytes()
<< "ChecksumMethod" << blob_file_addition.GetChecksumMethod()
<< "ChecksumValue"
<< Slice(blob_file_addition.GetChecksumValue()).ToString(/* hex */ true);
<< "ChecksumValue" << blob_file_addition.GetChecksumValue();
return jw;
}

View File

@ -46,9 +46,7 @@ TEST_F(BlobFileAdditionTest, NonEmpty) {
constexpr uint64_t total_blob_count = 2;
constexpr uint64_t total_blob_bytes = 123456;
const std::string checksum_method("SHA1");
const std::string checksum_value(
"\xbd\xb7\xf3\x4a\x59\xdf\xa1\x59\x2c\xe7\xf5\x2e\x99\xf9\x8c\x57\x0c\x52"
"\x5c\xbd");
const std::string checksum_value("bdb7f34a59dfa1592ce7f52e99f98c570c525cbd");
BlobFileAddition blob_file_addition(blob_file_number, total_blob_count,
total_blob_bytes, checksum_method,
@ -115,9 +113,7 @@ TEST_F(BlobFileAdditionTest, DecodeErrors) {
ASSERT_TRUE(std::strstr(s.getState(), "checksum value"));
}
constexpr char checksum_value[] =
"\xbd\xb7\xf3\x4a\x59\xdf\xa1\x59\x2c\xe7\xf5\x2e\x99\xf9\x8c\x57\x0c\x52"
"\x5c\xbd";
constexpr char checksum_value[] = "bdb7f34a59dfa1592ce7f52e99f98c570c525cbd";
PutLengthPrefixedSlice(&str, checksum_value);
slice = str;
@ -154,7 +150,7 @@ TEST_F(BlobFileAdditionTest, ForwardCompatibleCustomField) {
constexpr uint64_t total_blob_count = 9999;
constexpr uint64_t total_blob_bytes = 100000000;
const std::string checksum_method("CRC32");
const std::string checksum_value("\x3d\x87\xff\x57");
const std::string checksum_value("3d87ff57");
BlobFileAddition blob_file_addition(blob_file_number, total_blob_count,
total_blob_bytes, checksum_method,
@ -182,7 +178,7 @@ TEST_F(BlobFileAdditionTest, ForwardIncompatibleCustomField) {
constexpr uint64_t total_blob_count = 100;
constexpr uint64_t total_blob_bytes = 2000000;
const std::string checksum_method("CRC32B");
const std::string checksum_value("\x6d\xbd\xf2\x3a");
const std::string checksum_value("6dbdf23a");
BlobFileAddition blob_file_addition(blob_file_number, total_blob_count,
total_blob_bytes, checksum_method,

View File

@ -1,375 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "db/blob/blob_file_builder.h"
#include <cassert>
#include "db/blob/blob_file_addition.h"
#include "db/blob/blob_file_completion_callback.h"
#include "db/blob/blob_index.h"
#include "db/blob/blob_log_format.h"
#include "db/blob/blob_log_writer.h"
#include "db/event_helpers.h"
#include "db/version_set.h"
#include "file/filename.h"
#include "file/read_write_util.h"
#include "file/writable_file_writer.h"
#include "logging/logging.h"
#include "options/cf_options.h"
#include "options/options_helper.h"
#include "rocksdb/slice.h"
#include "rocksdb/status.h"
#include "test_util/sync_point.h"
#include "trace_replay/io_tracer.h"
#include "util/compression.h"
namespace ROCKSDB_NAMESPACE {
BlobFileBuilder::BlobFileBuilder(
VersionSet* versions, FileSystem* fs,
const ImmutableOptions* immutable_options,
const MutableCFOptions* mutable_cf_options, const FileOptions* file_options,
int job_id, uint32_t column_family_id,
const std::string& column_family_name, Env::IOPriority io_priority,
Env::WriteLifeTimeHint write_hint,
const std::shared_ptr<IOTracer>& io_tracer,
BlobFileCompletionCallback* blob_callback,
BlobFileCreationReason creation_reason,
std::vector<std::string>* blob_file_paths,
std::vector<BlobFileAddition>* blob_file_additions)
: BlobFileBuilder([versions]() { return versions->NewFileNumber(); }, fs,
immutable_options, mutable_cf_options, file_options,
job_id, column_family_id, column_family_name, io_priority,
write_hint, io_tracer, blob_callback, creation_reason,
blob_file_paths, blob_file_additions) {}
BlobFileBuilder::BlobFileBuilder(
std::function<uint64_t()> file_number_generator, FileSystem* fs,
const ImmutableOptions* immutable_options,
const MutableCFOptions* mutable_cf_options, const FileOptions* file_options,
int job_id, uint32_t column_family_id,
const std::string& column_family_name, Env::IOPriority io_priority,
Env::WriteLifeTimeHint write_hint,
const std::shared_ptr<IOTracer>& io_tracer,
BlobFileCompletionCallback* blob_callback,
BlobFileCreationReason creation_reason,
std::vector<std::string>* blob_file_paths,
std::vector<BlobFileAddition>* blob_file_additions)
: file_number_generator_(std::move(file_number_generator)),
fs_(fs),
immutable_options_(immutable_options),
min_blob_size_(mutable_cf_options->min_blob_size),
blob_file_size_(mutable_cf_options->blob_file_size),
blob_compression_type_(mutable_cf_options->blob_compression_type),
file_options_(file_options),
job_id_(job_id),
column_family_id_(column_family_id),
column_family_name_(column_family_name),
io_priority_(io_priority),
write_hint_(write_hint),
io_tracer_(io_tracer),
blob_callback_(blob_callback),
creation_reason_(creation_reason),
blob_file_paths_(blob_file_paths),
blob_file_additions_(blob_file_additions),
blob_count_(0),
blob_bytes_(0) {
assert(file_number_generator_);
assert(fs_);
assert(immutable_options_);
assert(file_options_);
assert(blob_file_paths_);
assert(blob_file_paths_->empty());
assert(blob_file_additions_);
assert(blob_file_additions_->empty());
}
BlobFileBuilder::~BlobFileBuilder() = default;
Status BlobFileBuilder::Add(const Slice& key, const Slice& value,
std::string* blob_index) {
assert(blob_index);
assert(blob_index->empty());
if (value.size() < min_blob_size_) {
return Status::OK();
}
{
const Status s = OpenBlobFileIfNeeded();
if (!s.ok()) {
return s;
}
}
Slice blob = value;
std::string compressed_blob;
{
const Status s = CompressBlobIfNeeded(&blob, &compressed_blob);
if (!s.ok()) {
return s;
}
}
uint64_t blob_file_number = 0;
uint64_t blob_offset = 0;
{
const Status s =
WriteBlobToFile(key, blob, &blob_file_number, &blob_offset);
if (!s.ok()) {
return s;
}
}
{
const Status s = CloseBlobFileIfNeeded();
if (!s.ok()) {
return s;
}
}
BlobIndex::EncodeBlob(blob_index, blob_file_number, blob_offset, blob.size(),
blob_compression_type_);
return Status::OK();
}
Status BlobFileBuilder::Finish() {
if (!IsBlobFileOpen()) {
return Status::OK();
}
return CloseBlobFile();
}
bool BlobFileBuilder::IsBlobFileOpen() const { return !!writer_; }
Status BlobFileBuilder::OpenBlobFileIfNeeded() {
if (IsBlobFileOpen()) {
return Status::OK();
}
assert(!blob_count_);
assert(!blob_bytes_);
assert(file_number_generator_);
const uint64_t blob_file_number = file_number_generator_();
assert(immutable_options_);
assert(!immutable_options_->cf_paths.empty());
std::string blob_file_path =
BlobFileName(immutable_options_->cf_paths.front().path, blob_file_number);
if (blob_callback_) {
blob_callback_->OnBlobFileCreationStarted(
blob_file_path, column_family_name_, job_id_, creation_reason_);
}
std::unique_ptr<FSWritableFile> file;
{
assert(file_options_);
Status s = NewWritableFile(fs_, blob_file_path, &file, *file_options_);
TEST_SYNC_POINT_CALLBACK(
"BlobFileBuilder::OpenBlobFileIfNeeded:NewWritableFile", &s);
if (!s.ok()) {
return s;
}
}
// Note: files get added to blob_file_paths_ right after the open, so they
// can be cleaned up upon failure. Contrast this with blob_file_additions_,
// which only contains successfully written files.
assert(blob_file_paths_);
blob_file_paths_->emplace_back(std::move(blob_file_path));
assert(file);
file->SetIOPriority(io_priority_);
file->SetWriteLifeTimeHint(write_hint_);
FileTypeSet tmp_set = immutable_options_->checksum_handoff_file_types;
Statistics* const statistics = immutable_options_->stats;
std::unique_ptr<WritableFileWriter> file_writer(new WritableFileWriter(
std::move(file), blob_file_paths_->back(), *file_options_,
immutable_options_->clock, io_tracer_, statistics,
immutable_options_->listeners,
immutable_options_->file_checksum_gen_factory.get(),
tmp_set.Contains(FileType::kBlobFile), false));
constexpr bool do_flush = false;
std::unique_ptr<BlobLogWriter> blob_log_writer(new BlobLogWriter(
std::move(file_writer), immutable_options_->clock, statistics,
blob_file_number, immutable_options_->use_fsync, do_flush));
constexpr bool has_ttl = false;
constexpr ExpirationRange expiration_range;
BlobLogHeader header(column_family_id_, blob_compression_type_, has_ttl,
expiration_range);
{
Status s = blob_log_writer->WriteHeader(header);
TEST_SYNC_POINT_CALLBACK(
"BlobFileBuilder::OpenBlobFileIfNeeded:WriteHeader", &s);
if (!s.ok()) {
return s;
}
}
writer_ = std::move(blob_log_writer);
assert(IsBlobFileOpen());
return Status::OK();
}
Status BlobFileBuilder::CompressBlobIfNeeded(
Slice* blob, std::string* compressed_blob) const {
assert(blob);
assert(compressed_blob);
assert(compressed_blob->empty());
assert(immutable_options_);
if (blob_compression_type_ == kNoCompression) {
return Status::OK();
}
CompressionOptions opts;
CompressionContext context(blob_compression_type_);
constexpr uint64_t sample_for_compression = 0;
CompressionInfo info(opts, context, CompressionDict::GetEmptyDict(),
blob_compression_type_, sample_for_compression);
constexpr uint32_t compression_format_version = 2;
bool success = false;
{
StopWatch stop_watch(immutable_options_->clock, immutable_options_->stats,
BLOB_DB_COMPRESSION_MICROS);
success =
CompressData(*blob, info, compression_format_version, compressed_blob);
}
if (!success) {
return Status::Corruption("Error compressing blob");
}
*blob = Slice(*compressed_blob);
return Status::OK();
}
Status BlobFileBuilder::WriteBlobToFile(const Slice& key, const Slice& blob,
uint64_t* blob_file_number,
uint64_t* blob_offset) {
assert(IsBlobFileOpen());
assert(blob_file_number);
assert(blob_offset);
uint64_t key_offset = 0;
Status s = writer_->AddRecord(key, blob, &key_offset, blob_offset);
TEST_SYNC_POINT_CALLBACK("BlobFileBuilder::WriteBlobToFile:AddRecord", &s);
if (!s.ok()) {
return s;
}
*blob_file_number = writer_->get_log_number();
++blob_count_;
blob_bytes_ += BlobLogRecord::kHeaderSize + key.size() + blob.size();
return Status::OK();
}
Status BlobFileBuilder::CloseBlobFile() {
assert(IsBlobFileOpen());
BlobLogFooter footer;
footer.blob_count = blob_count_;
std::string checksum_method;
std::string checksum_value;
Status s = writer_->AppendFooter(footer, &checksum_method, &checksum_value);
TEST_SYNC_POINT_CALLBACK("BlobFileBuilder::WriteBlobToFile:AppendFooter", &s);
if (!s.ok()) {
return s;
}
const uint64_t blob_file_number = writer_->get_log_number();
if (blob_callback_) {
s = blob_callback_->OnBlobFileCompleted(
blob_file_paths_->back(), column_family_name_, job_id_,
blob_file_number, creation_reason_, s, checksum_value, checksum_method,
blob_count_, blob_bytes_);
}
assert(blob_file_additions_);
blob_file_additions_->emplace_back(blob_file_number, blob_count_, blob_bytes_,
std::move(checksum_method),
std::move(checksum_value));
assert(immutable_options_);
ROCKS_LOG_INFO(immutable_options_->logger,
"[%s] [JOB %d] Generated blob file #%" PRIu64 ": %" PRIu64
" total blobs, %" PRIu64 " total bytes",
column_family_name_.c_str(), job_id_, blob_file_number,
blob_count_, blob_bytes_);
writer_.reset();
blob_count_ = 0;
blob_bytes_ = 0;
return s;
}
Status BlobFileBuilder::CloseBlobFileIfNeeded() {
assert(IsBlobFileOpen());
const WritableFileWriter* const file_writer = writer_->file();
assert(file_writer);
if (file_writer->GetFileSize() < blob_file_size_) {
return Status::OK();
}
return CloseBlobFile();
}
void BlobFileBuilder::Abandon(const Status& s) {
if (!IsBlobFileOpen()) {
return;
}
if (blob_callback_) {
// BlobFileBuilder::Abandon() is called because of error while writing to
// Blob files. So we can ignore the below error.
blob_callback_
->OnBlobFileCompleted(blob_file_paths_->back(), column_family_name_,
job_id_, writer_->get_log_number(),
creation_reason_, s, "", "", blob_count_,
blob_bytes_)
.PermitUncheckedError();
}
writer_.reset();
blob_count_ = 0;
blob_bytes_ = 0;
}
} // namespace ROCKSDB_NAMESPACE

View File

@ -1,103 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#include <cinttypes>
#include <functional>
#include <memory>
#include <string>
#include <vector>
#include "rocksdb/compression_type.h"
#include "rocksdb/env.h"
#include "rocksdb/rocksdb_namespace.h"
#include "rocksdb/types.h"
namespace ROCKSDB_NAMESPACE {
class VersionSet;
class FileSystem;
class SystemClock;
struct ImmutableOptions;
struct MutableCFOptions;
struct FileOptions;
class BlobFileAddition;
class Status;
class Slice;
class BlobLogWriter;
class IOTracer;
class BlobFileCompletionCallback;
class BlobFileBuilder {
public:
BlobFileBuilder(VersionSet* versions, FileSystem* fs,
const ImmutableOptions* immutable_options,
const MutableCFOptions* mutable_cf_options,
const FileOptions* file_options, int job_id,
uint32_t column_family_id,
const std::string& column_family_name,
Env::IOPriority io_priority,
Env::WriteLifeTimeHint write_hint,
const std::shared_ptr<IOTracer>& io_tracer,
BlobFileCompletionCallback* blob_callback,
BlobFileCreationReason creation_reason,
std::vector<std::string>* blob_file_paths,
std::vector<BlobFileAddition>* blob_file_additions);
BlobFileBuilder(std::function<uint64_t()> file_number_generator,
FileSystem* fs, const ImmutableOptions* immutable_options,
const MutableCFOptions* mutable_cf_options,
const FileOptions* file_options, int job_id,
uint32_t column_family_id,
const std::string& column_family_name,
Env::IOPriority io_priority,
Env::WriteLifeTimeHint write_hint,
const std::shared_ptr<IOTracer>& io_tracer,
BlobFileCompletionCallback* blob_callback,
BlobFileCreationReason creation_reason,
std::vector<std::string>* blob_file_paths,
std::vector<BlobFileAddition>* blob_file_additions);
BlobFileBuilder(const BlobFileBuilder&) = delete;
BlobFileBuilder& operator=(const BlobFileBuilder&) = delete;
~BlobFileBuilder();
Status Add(const Slice& key, const Slice& value, std::string* blob_index);
Status Finish();
void Abandon(const Status& s);
private:
bool IsBlobFileOpen() const;
Status OpenBlobFileIfNeeded();
Status CompressBlobIfNeeded(Slice* blob, std::string* compressed_blob) const;
Status WriteBlobToFile(const Slice& key, const Slice& blob,
uint64_t* blob_file_number, uint64_t* blob_offset);
Status CloseBlobFile();
Status CloseBlobFileIfNeeded();
std::function<uint64_t()> file_number_generator_;
FileSystem* fs_;
const ImmutableOptions* immutable_options_;
uint64_t min_blob_size_;
uint64_t blob_file_size_;
CompressionType blob_compression_type_;
const FileOptions* file_options_;
int job_id_;
uint32_t column_family_id_;
std::string column_family_name_;
Env::IOPriority io_priority_;
Env::WriteLifeTimeHint write_hint_;
std::shared_ptr<IOTracer> io_tracer_;
BlobFileCompletionCallback* blob_callback_;
BlobFileCreationReason creation_reason_;
std::vector<std::string>* blob_file_paths_;
std::vector<BlobFileAddition>* blob_file_additions_;
std::unique_ptr<BlobLogWriter> writer_;
uint64_t blob_count_;
uint64_t blob_bytes_;
};
} // namespace ROCKSDB_NAMESPACE

View File

@ -1,672 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "db/blob/blob_file_builder.h"
#include <cassert>
#include <cinttypes>
#include <string>
#include <utility>
#include <vector>
#include "db/blob/blob_file_addition.h"
#include "db/blob/blob_index.h"
#include "db/blob/blob_log_format.h"
#include "db/blob/blob_log_sequential_reader.h"
#include "env/mock_env.h"
#include "file/filename.h"
#include "file/random_access_file_reader.h"
#include "options/cf_options.h"
#include "rocksdb/env.h"
#include "rocksdb/file_checksum.h"
#include "rocksdb/options.h"
#include "test_util/sync_point.h"
#include "test_util/testharness.h"
#include "util/compression.h"
#include "utilities/fault_injection_env.h"
namespace ROCKSDB_NAMESPACE {
class TestFileNumberGenerator {
public:
uint64_t operator()() { return ++next_file_number_; }
private:
uint64_t next_file_number_ = 1;
};
class BlobFileBuilderTest : public testing::Test {
protected:
BlobFileBuilderTest() {
mock_env_.reset(MockEnv::Create(Env::Default()));
fs_ = mock_env_->GetFileSystem().get();
clock_ = mock_env_->GetSystemClock().get();
}
void VerifyBlobFile(uint64_t blob_file_number,
const std::string& blob_file_path,
uint32_t column_family_id,
CompressionType blob_compression_type,
const std::vector<std::pair<std::string, std::string>>&
expected_key_value_pairs,
const std::vector<std::string>& blob_indexes) {
assert(expected_key_value_pairs.size() == blob_indexes.size());
std::unique_ptr<FSRandomAccessFile> file;
constexpr IODebugContext* dbg = nullptr;
ASSERT_OK(
fs_->NewRandomAccessFile(blob_file_path, file_options_, &file, dbg));
std::unique_ptr<RandomAccessFileReader> file_reader(
new RandomAccessFileReader(std::move(file), blob_file_path, clock_));
constexpr Statistics* statistics = nullptr;
BlobLogSequentialReader blob_log_reader(std::move(file_reader), clock_,
statistics);
BlobLogHeader header;
ASSERT_OK(blob_log_reader.ReadHeader(&header));
ASSERT_EQ(header.version, kVersion1);
ASSERT_EQ(header.column_family_id, column_family_id);
ASSERT_EQ(header.compression, blob_compression_type);
ASSERT_FALSE(header.has_ttl);
ASSERT_EQ(header.expiration_range, ExpirationRange());
for (size_t i = 0; i < expected_key_value_pairs.size(); ++i) {
BlobLogRecord record;
uint64_t blob_offset = 0;
ASSERT_OK(blob_log_reader.ReadRecord(
&record, BlobLogSequentialReader::kReadHeaderKeyBlob, &blob_offset));
// Check the contents of the blob file
const auto& expected_key_value = expected_key_value_pairs[i];
const auto& key = expected_key_value.first;
const auto& value = expected_key_value.second;
ASSERT_EQ(record.key_size, key.size());
ASSERT_EQ(record.value_size, value.size());
ASSERT_EQ(record.expiration, 0);
ASSERT_EQ(record.key, key);
ASSERT_EQ(record.value, value);
// Make sure the blob reference returned by the builder points to the
// right place
BlobIndex blob_index;
ASSERT_OK(blob_index.DecodeFrom(blob_indexes[i]));
ASSERT_FALSE(blob_index.IsInlined());
ASSERT_FALSE(blob_index.HasTTL());
ASSERT_EQ(blob_index.file_number(), blob_file_number);
ASSERT_EQ(blob_index.offset(), blob_offset);
ASSERT_EQ(blob_index.size(), value.size());
}
BlobLogFooter footer;
ASSERT_OK(blob_log_reader.ReadFooter(&footer));
ASSERT_EQ(footer.blob_count, expected_key_value_pairs.size());
ASSERT_EQ(footer.expiration_range, ExpirationRange());
}
std::unique_ptr<Env> mock_env_;
FileSystem* fs_;
SystemClock* clock_;
FileOptions file_options_;
};
TEST_F(BlobFileBuilderTest, BuildAndCheckOneFile) {
// Build a single blob file
constexpr size_t number_of_blobs = 10;
constexpr size_t key_size = 1;
constexpr size_t value_size = 4;
constexpr size_t value_offset = 1234;
Options options;
options.cf_paths.emplace_back(
test::PerThreadDBPath(mock_env_.get(),
"BlobFileBuilderTest_BuildAndCheckOneFile"),
0);
options.enable_blob_files = true;
options.env = mock_env_.get();
ImmutableOptions immutable_options(options);
MutableCFOptions mutable_cf_options(options);
constexpr int job_id = 1;
constexpr uint32_t column_family_id = 123;
constexpr char column_family_name[] = "foobar";
constexpr Env::IOPriority io_priority = Env::IO_HIGH;
constexpr Env::WriteLifeTimeHint write_hint = Env::WLTH_MEDIUM;
std::vector<std::string> blob_file_paths;
std::vector<BlobFileAddition> blob_file_additions;
BlobFileBuilder builder(
TestFileNumberGenerator(), fs_, &immutable_options, &mutable_cf_options,
&file_options_, job_id, column_family_id, column_family_name, io_priority,
write_hint, nullptr /*IOTracer*/, nullptr /*BlobFileCompletionCallback*/,
BlobFileCreationReason::kFlush, &blob_file_paths, &blob_file_additions);
std::vector<std::pair<std::string, std::string>> expected_key_value_pairs(
number_of_blobs);
std::vector<std::string> blob_indexes(number_of_blobs);
for (size_t i = 0; i < number_of_blobs; ++i) {
auto& expected_key_value = expected_key_value_pairs[i];
auto& key = expected_key_value.first;
key = std::to_string(i);
assert(key.size() == key_size);
auto& value = expected_key_value.second;
value = std::to_string(i + value_offset);
assert(value.size() == value_size);
auto& blob_index = blob_indexes[i];
ASSERT_OK(builder.Add(key, value, &blob_index));
ASSERT_FALSE(blob_index.empty());
}
ASSERT_OK(builder.Finish());
// Check the metadata generated
constexpr uint64_t blob_file_number = 2;
ASSERT_EQ(blob_file_paths.size(), 1);
const std::string& blob_file_path = blob_file_paths[0];
ASSERT_EQ(
blob_file_path,
BlobFileName(immutable_options.cf_paths.front().path, blob_file_number));
ASSERT_EQ(blob_file_additions.size(), 1);
const auto& blob_file_addition = blob_file_additions[0];
ASSERT_EQ(blob_file_addition.GetBlobFileNumber(), blob_file_number);
ASSERT_EQ(blob_file_addition.GetTotalBlobCount(), number_of_blobs);
ASSERT_EQ(
blob_file_addition.GetTotalBlobBytes(),
number_of_blobs * (BlobLogRecord::kHeaderSize + key_size + value_size));
// Verify the contents of the new blob file as well as the blob references
VerifyBlobFile(blob_file_number, blob_file_path, column_family_id,
kNoCompression, expected_key_value_pairs, blob_indexes);
}
TEST_F(BlobFileBuilderTest, BuildAndCheckMultipleFiles) {
// Build multiple blob files: file size limit is set to the size of a single
// value, so each blob ends up in a file of its own
constexpr size_t number_of_blobs = 10;
constexpr size_t key_size = 1;
constexpr size_t value_size = 10;
constexpr size_t value_offset = 1234567890;
Options options;
options.cf_paths.emplace_back(
test::PerThreadDBPath(mock_env_.get(),
"BlobFileBuilderTest_BuildAndCheckMultipleFiles"),
0);
options.enable_blob_files = true;
options.blob_file_size = value_size;
options.env = mock_env_.get();
ImmutableOptions immutable_options(options);
MutableCFOptions mutable_cf_options(options);
constexpr int job_id = 1;
constexpr uint32_t column_family_id = 123;
constexpr char column_family_name[] = "foobar";
constexpr Env::IOPriority io_priority = Env::IO_HIGH;
constexpr Env::WriteLifeTimeHint write_hint = Env::WLTH_MEDIUM;
std::vector<std::string> blob_file_paths;
std::vector<BlobFileAddition> blob_file_additions;
BlobFileBuilder builder(
TestFileNumberGenerator(), fs_, &immutable_options, &mutable_cf_options,
&file_options_, job_id, column_family_id, column_family_name, io_priority,
write_hint, nullptr /*IOTracer*/, nullptr /*BlobFileCompletionCallback*/,
BlobFileCreationReason::kFlush, &blob_file_paths, &blob_file_additions);
std::vector<std::pair<std::string, std::string>> expected_key_value_pairs(
number_of_blobs);
std::vector<std::string> blob_indexes(number_of_blobs);
for (size_t i = 0; i < number_of_blobs; ++i) {
auto& expected_key_value = expected_key_value_pairs[i];
auto& key = expected_key_value.first;
key = std::to_string(i);
assert(key.size() == key_size);
auto& value = expected_key_value.second;
value = std::to_string(i + value_offset);
assert(value.size() == value_size);
auto& blob_index = blob_indexes[i];
ASSERT_OK(builder.Add(key, value, &blob_index));
ASSERT_FALSE(blob_index.empty());
}
ASSERT_OK(builder.Finish());
// Check the metadata generated
ASSERT_EQ(blob_file_paths.size(), number_of_blobs);
ASSERT_EQ(blob_file_additions.size(), number_of_blobs);
for (size_t i = 0; i < number_of_blobs; ++i) {
const uint64_t blob_file_number = i + 2;
ASSERT_EQ(blob_file_paths[i],
BlobFileName(immutable_options.cf_paths.front().path,
blob_file_number));
const auto& blob_file_addition = blob_file_additions[i];
ASSERT_EQ(blob_file_addition.GetBlobFileNumber(), blob_file_number);
ASSERT_EQ(blob_file_addition.GetTotalBlobCount(), 1);
ASSERT_EQ(blob_file_addition.GetTotalBlobBytes(),
BlobLogRecord::kHeaderSize + key_size + value_size);
}
// Verify the contents of the new blob files as well as the blob references
for (size_t i = 0; i < number_of_blobs; ++i) {
std::vector<std::pair<std::string, std::string>> expected_key_value_pair{
expected_key_value_pairs[i]};
std::vector<std::string> blob_index{blob_indexes[i]};
VerifyBlobFile(i + 2, blob_file_paths[i], column_family_id, kNoCompression,
expected_key_value_pair, blob_index);
}
}
TEST_F(BlobFileBuilderTest, InlinedValues) {
// All values are below the min_blob_size threshold; no blob files get written
constexpr size_t number_of_blobs = 10;
constexpr size_t key_size = 1;
constexpr size_t value_size = 10;
constexpr size_t value_offset = 1234567890;
Options options;
options.cf_paths.emplace_back(
test::PerThreadDBPath(mock_env_.get(),
"BlobFileBuilderTest_InlinedValues"),
0);
options.enable_blob_files = true;
options.min_blob_size = 1024;
options.env = mock_env_.get();
ImmutableOptions immutable_options(options);
MutableCFOptions mutable_cf_options(options);
constexpr int job_id = 1;
constexpr uint32_t column_family_id = 123;
constexpr char column_family_name[] = "foobar";
constexpr Env::IOPriority io_priority = Env::IO_HIGH;
constexpr Env::WriteLifeTimeHint write_hint = Env::WLTH_MEDIUM;
std::vector<std::string> blob_file_paths;
std::vector<BlobFileAddition> blob_file_additions;
BlobFileBuilder builder(
TestFileNumberGenerator(), fs_, &immutable_options, &mutable_cf_options,
&file_options_, job_id, column_family_id, column_family_name, io_priority,
write_hint, nullptr /*IOTracer*/, nullptr /*BlobFileCompletionCallback*/,
BlobFileCreationReason::kFlush, &blob_file_paths, &blob_file_additions);
for (size_t i = 0; i < number_of_blobs; ++i) {
const std::string key = std::to_string(i);
assert(key.size() == key_size);
const std::string value = std::to_string(i + value_offset);
assert(value.size() == value_size);
std::string blob_index;
ASSERT_OK(builder.Add(key, value, &blob_index));
ASSERT_TRUE(blob_index.empty());
}
ASSERT_OK(builder.Finish());
// Check the metadata generated
ASSERT_TRUE(blob_file_paths.empty());
ASSERT_TRUE(blob_file_additions.empty());
}
TEST_F(BlobFileBuilderTest, Compression) {
// Build a blob file with a compressed blob
if (!Snappy_Supported()) {
return;
}
constexpr size_t key_size = 1;
constexpr size_t value_size = 100;
Options options;
options.cf_paths.emplace_back(
test::PerThreadDBPath(mock_env_.get(), "BlobFileBuilderTest_Compression"),
0);
options.enable_blob_files = true;
options.blob_compression_type = kSnappyCompression;
options.env = mock_env_.get();
ImmutableOptions immutable_options(options);
MutableCFOptions mutable_cf_options(options);
constexpr int job_id = 1;
constexpr uint32_t column_family_id = 123;
constexpr char column_family_name[] = "foobar";
constexpr Env::IOPriority io_priority = Env::IO_HIGH;
constexpr Env::WriteLifeTimeHint write_hint = Env::WLTH_MEDIUM;
std::vector<std::string> blob_file_paths;
std::vector<BlobFileAddition> blob_file_additions;
BlobFileBuilder builder(
TestFileNumberGenerator(), fs_, &immutable_options, &mutable_cf_options,
&file_options_, job_id, column_family_id, column_family_name, io_priority,
write_hint, nullptr /*IOTracer*/, nullptr /*BlobFileCompletionCallback*/,
BlobFileCreationReason::kFlush, &blob_file_paths, &blob_file_additions);
const std::string key("1");
const std::string uncompressed_value(value_size, 'x');
std::string blob_index;
ASSERT_OK(builder.Add(key, uncompressed_value, &blob_index));
ASSERT_FALSE(blob_index.empty());
ASSERT_OK(builder.Finish());
// Check the metadata generated
constexpr uint64_t blob_file_number = 2;
ASSERT_EQ(blob_file_paths.size(), 1);
const std::string& blob_file_path = blob_file_paths[0];
ASSERT_EQ(
blob_file_path,
BlobFileName(immutable_options.cf_paths.front().path, blob_file_number));
ASSERT_EQ(blob_file_additions.size(), 1);
const auto& blob_file_addition = blob_file_additions[0];
ASSERT_EQ(blob_file_addition.GetBlobFileNumber(), blob_file_number);
ASSERT_EQ(blob_file_addition.GetTotalBlobCount(), 1);
CompressionOptions opts;
CompressionContext context(kSnappyCompression);
constexpr uint64_t sample_for_compression = 0;
CompressionInfo info(opts, context, CompressionDict::GetEmptyDict(),
kSnappyCompression, sample_for_compression);
std::string compressed_value;
ASSERT_TRUE(Snappy_Compress(info, uncompressed_value.data(),
uncompressed_value.size(), &compressed_value));
ASSERT_EQ(blob_file_addition.GetTotalBlobBytes(),
BlobLogRecord::kHeaderSize + key_size + compressed_value.size());
// Verify the contents of the new blob file as well as the blob reference
std::vector<std::pair<std::string, std::string>> expected_key_value_pairs{
{key, compressed_value}};
std::vector<std::string> blob_indexes{blob_index};
VerifyBlobFile(blob_file_number, blob_file_path, column_family_id,
kSnappyCompression, expected_key_value_pairs, blob_indexes);
}
TEST_F(BlobFileBuilderTest, CompressionError) {
// Simulate an error during compression
if (!Snappy_Supported()) {
return;
}
Options options;
options.cf_paths.emplace_back(
test::PerThreadDBPath(mock_env_.get(),
"BlobFileBuilderTest_CompressionError"),
0);
options.enable_blob_files = true;
options.blob_compression_type = kSnappyCompression;
options.env = mock_env_.get();
ImmutableOptions immutable_options(options);
MutableCFOptions mutable_cf_options(options);
constexpr int job_id = 1;
constexpr uint32_t column_family_id = 123;
constexpr char column_family_name[] = "foobar";
constexpr Env::IOPriority io_priority = Env::IO_HIGH;
constexpr Env::WriteLifeTimeHint write_hint = Env::WLTH_MEDIUM;
std::vector<std::string> blob_file_paths;
std::vector<BlobFileAddition> blob_file_additions;
BlobFileBuilder builder(
TestFileNumberGenerator(), fs_, &immutable_options, &mutable_cf_options,
&file_options_, job_id, column_family_id, column_family_name, io_priority,
write_hint, nullptr /*IOTracer*/, nullptr /*BlobFileCompletionCallback*/,
BlobFileCreationReason::kFlush, &blob_file_paths, &blob_file_additions);
SyncPoint::GetInstance()->SetCallBack("CompressData:TamperWithReturnValue",
[](void* arg) {
bool* ret = static_cast<bool*>(arg);
*ret = false;
});
SyncPoint::GetInstance()->EnableProcessing();
constexpr char key[] = "1";
constexpr char value[] = "deadbeef";
std::string blob_index;
ASSERT_TRUE(builder.Add(key, value, &blob_index).IsCorruption());
SyncPoint::GetInstance()->DisableProcessing();
SyncPoint::GetInstance()->ClearAllCallBacks();
constexpr uint64_t blob_file_number = 2;
ASSERT_EQ(blob_file_paths.size(), 1);
ASSERT_EQ(
blob_file_paths[0],
BlobFileName(immutable_options.cf_paths.front().path, blob_file_number));
ASSERT_TRUE(blob_file_additions.empty());
}
TEST_F(BlobFileBuilderTest, Checksum) {
// Build a blob file with checksum
class DummyFileChecksumGenerator : public FileChecksumGenerator {
public:
void Update(const char* /* data */, size_t /* n */) override {}
void Finalize() override {}
std::string GetChecksum() const override { return std::string("dummy"); }
const char* Name() const override { return "DummyFileChecksum"; }
};
class DummyFileChecksumGenFactory : public FileChecksumGenFactory {
public:
std::unique_ptr<FileChecksumGenerator> CreateFileChecksumGenerator(
const FileChecksumGenContext& /* context */) override {
return std::unique_ptr<FileChecksumGenerator>(
new DummyFileChecksumGenerator);
}
const char* Name() const override { return "DummyFileChecksumGenFactory"; }
};
Options options;
options.cf_paths.emplace_back(
test::PerThreadDBPath(mock_env_.get(), "BlobFileBuilderTest_Checksum"),
0);
options.enable_blob_files = true;
options.file_checksum_gen_factory =
std::make_shared<DummyFileChecksumGenFactory>();
options.env = mock_env_.get();
ImmutableOptions immutable_options(options);
MutableCFOptions mutable_cf_options(options);
constexpr int job_id = 1;
constexpr uint32_t column_family_id = 123;
constexpr char column_family_name[] = "foobar";
constexpr Env::IOPriority io_priority = Env::IO_HIGH;
constexpr Env::WriteLifeTimeHint write_hint = Env::WLTH_MEDIUM;
std::vector<std::string> blob_file_paths;
std::vector<BlobFileAddition> blob_file_additions;
BlobFileBuilder builder(
TestFileNumberGenerator(), fs_, &immutable_options, &mutable_cf_options,
&file_options_, job_id, column_family_id, column_family_name, io_priority,
write_hint, nullptr /*IOTracer*/, nullptr /*BlobFileCompletionCallback*/,
BlobFileCreationReason::kFlush, &blob_file_paths, &blob_file_additions);
const std::string key("1");
const std::string value("deadbeef");
std::string blob_index;
ASSERT_OK(builder.Add(key, value, &blob_index));
ASSERT_FALSE(blob_index.empty());
ASSERT_OK(builder.Finish());
// Check the metadata generated
constexpr uint64_t blob_file_number = 2;
ASSERT_EQ(blob_file_paths.size(), 1);
const std::string& blob_file_path = blob_file_paths[0];
ASSERT_EQ(
blob_file_path,
BlobFileName(immutable_options.cf_paths.front().path, blob_file_number));
ASSERT_EQ(blob_file_additions.size(), 1);
const auto& blob_file_addition = blob_file_additions[0];
ASSERT_EQ(blob_file_addition.GetBlobFileNumber(), blob_file_number);
ASSERT_EQ(blob_file_addition.GetTotalBlobCount(), 1);
ASSERT_EQ(blob_file_addition.GetTotalBlobBytes(),
BlobLogRecord::kHeaderSize + key.size() + value.size());
ASSERT_EQ(blob_file_addition.GetChecksumMethod(), "DummyFileChecksum");
ASSERT_EQ(blob_file_addition.GetChecksumValue(), "dummy");
// Verify the contents of the new blob file as well as the blob reference
std::vector<std::pair<std::string, std::string>> expected_key_value_pairs{
{key, value}};
std::vector<std::string> blob_indexes{blob_index};
VerifyBlobFile(blob_file_number, blob_file_path, column_family_id,
kNoCompression, expected_key_value_pairs, blob_indexes);
}
class BlobFileBuilderIOErrorTest
: public testing::Test,
public testing::WithParamInterface<std::string> {
protected:
BlobFileBuilderIOErrorTest() : sync_point_(GetParam()) {
mock_env_.reset(MockEnv::Create(Env::Default()));
fs_ = mock_env_->GetFileSystem().get();
}
std::unique_ptr<Env> mock_env_;
FileSystem* fs_;
FileOptions file_options_;
std::string sync_point_;
};
INSTANTIATE_TEST_CASE_P(
BlobFileBuilderTest, BlobFileBuilderIOErrorTest,
::testing::ValuesIn(std::vector<std::string>{
"BlobFileBuilder::OpenBlobFileIfNeeded:NewWritableFile",
"BlobFileBuilder::OpenBlobFileIfNeeded:WriteHeader",
"BlobFileBuilder::WriteBlobToFile:AddRecord",
"BlobFileBuilder::WriteBlobToFile:AppendFooter"}));
TEST_P(BlobFileBuilderIOErrorTest, IOError) {
// Simulate an I/O error during the specified step of Add()
// Note: blob_file_size will be set to value_size in order for the first blob
// to trigger close
constexpr size_t value_size = 8;
Options options;
options.cf_paths.emplace_back(
test::PerThreadDBPath(mock_env_.get(),
"BlobFileBuilderIOErrorTest_IOError"),
0);
options.enable_blob_files = true;
options.blob_file_size = value_size;
options.env = mock_env_.get();
ImmutableOptions immutable_options(options);
MutableCFOptions mutable_cf_options(options);
constexpr int job_id = 1;
constexpr uint32_t column_family_id = 123;
constexpr char column_family_name[] = "foobar";
constexpr Env::IOPriority io_priority = Env::IO_HIGH;
constexpr Env::WriteLifeTimeHint write_hint = Env::WLTH_MEDIUM;
std::vector<std::string> blob_file_paths;
std::vector<BlobFileAddition> blob_file_additions;
BlobFileBuilder builder(
TestFileNumberGenerator(), fs_, &immutable_options, &mutable_cf_options,
&file_options_, job_id, column_family_id, column_family_name, io_priority,
write_hint, nullptr /*IOTracer*/, nullptr /*BlobFileCompletionCallback*/,
BlobFileCreationReason::kFlush, &blob_file_paths, &blob_file_additions);
SyncPoint::GetInstance()->SetCallBack(sync_point_, [this](void* arg) {
Status* const s = static_cast<Status*>(arg);
assert(s);
(*s) = Status::IOError(sync_point_);
});
SyncPoint::GetInstance()->EnableProcessing();
constexpr char key[] = "1";
constexpr char value[] = "deadbeef";
std::string blob_index;
ASSERT_TRUE(builder.Add(key, value, &blob_index).IsIOError());
SyncPoint::GetInstance()->DisableProcessing();
SyncPoint::GetInstance()->ClearAllCallBacks();
if (sync_point_ == "BlobFileBuilder::OpenBlobFileIfNeeded:NewWritableFile") {
ASSERT_TRUE(blob_file_paths.empty());
} else {
constexpr uint64_t blob_file_number = 2;
ASSERT_EQ(blob_file_paths.size(), 1);
ASSERT_EQ(blob_file_paths[0],
BlobFileName(immutable_options.cf_paths.front().path,
blob_file_number));
}
ASSERT_TRUE(blob_file_additions.empty());
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

View File

@ -1,102 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "db/blob/blob_file_cache.h"
#include <cassert>
#include <memory>
#include "db/blob/blob_file_reader.h"
#include "options/cf_options.h"
#include "rocksdb/cache.h"
#include "rocksdb/slice.h"
#include "test_util/sync_point.h"
#include "trace_replay/io_tracer.h"
#include "util/hash.h"
namespace ROCKSDB_NAMESPACE {
BlobFileCache::BlobFileCache(Cache* cache,
const ImmutableOptions* immutable_options,
const FileOptions* file_options,
uint32_t column_family_id,
HistogramImpl* blob_file_read_hist,
const std::shared_ptr<IOTracer>& io_tracer)
: cache_(cache),
mutex_(kNumberOfMutexStripes, kGetSliceNPHash64UnseededFnPtr),
immutable_options_(immutable_options),
file_options_(file_options),
column_family_id_(column_family_id),
blob_file_read_hist_(blob_file_read_hist),
io_tracer_(io_tracer) {
assert(cache_);
assert(immutable_options_);
assert(file_options_);
}
Status BlobFileCache::GetBlobFileReader(
uint64_t blob_file_number,
CacheHandleGuard<BlobFileReader>* blob_file_reader) {
assert(blob_file_reader);
assert(blob_file_reader->IsEmpty());
const Slice key = GetSlice(&blob_file_number);
assert(cache_);
Cache::Handle* handle = cache_->Lookup(key);
if (handle) {
*blob_file_reader = CacheHandleGuard<BlobFileReader>(cache_, handle);
return Status::OK();
}
TEST_SYNC_POINT("BlobFileCache::GetBlobFileReader:DoubleCheck");
// Check again while holding mutex
MutexLock lock(mutex_.get(key));
handle = cache_->Lookup(key);
if (handle) {
*blob_file_reader = CacheHandleGuard<BlobFileReader>(cache_, handle);
return Status::OK();
}
assert(immutable_options_);
Statistics* const statistics = immutable_options_->stats;
RecordTick(statistics, NO_FILE_OPENS);
std::unique_ptr<BlobFileReader> reader;
{
assert(file_options_);
const Status s = BlobFileReader::Create(
*immutable_options_, *file_options_, column_family_id_,
blob_file_read_hist_, blob_file_number, io_tracer_, &reader);
if (!s.ok()) {
RecordTick(statistics, NO_FILE_ERRORS);
return s;
}
}
{
constexpr size_t charge = 1;
const Status s = cache_->Insert(key, reader.get(), charge,
&DeleteCacheEntry<BlobFileReader>, &handle);
if (!s.ok()) {
RecordTick(statistics, NO_FILE_ERRORS);
return s;
}
}
reader.release();
*blob_file_reader = CacheHandleGuard<BlobFileReader>(cache_, handle);
return Status::OK();
}
} // namespace ROCKSDB_NAMESPACE

View File

@ -1,52 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#include <cinttypes>
#include "cache/cache_helpers.h"
#include "rocksdb/rocksdb_namespace.h"
#include "util/mutexlock.h"
namespace ROCKSDB_NAMESPACE {
class Cache;
struct ImmutableOptions;
struct FileOptions;
class HistogramImpl;
class Status;
class BlobFileReader;
class Slice;
class IOTracer;
class BlobFileCache {
public:
BlobFileCache(Cache* cache, const ImmutableOptions* immutable_options,
const FileOptions* file_options, uint32_t column_family_id,
HistogramImpl* blob_file_read_hist,
const std::shared_ptr<IOTracer>& io_tracer);
BlobFileCache(const BlobFileCache&) = delete;
BlobFileCache& operator=(const BlobFileCache&) = delete;
Status GetBlobFileReader(uint64_t blob_file_number,
CacheHandleGuard<BlobFileReader>* blob_file_reader);
private:
Cache* cache_;
// Note: mutex_ below is used to guard against multiple threads racing to open
// the same file.
Striped<port::Mutex, Slice> mutex_;
const ImmutableOptions* immutable_options_;
const FileOptions* file_options_;
uint32_t column_family_id_;
HistogramImpl* blob_file_read_hist_;
std::shared_ptr<IOTracer> io_tracer_;
static constexpr size_t kNumberOfMutexStripes = 1 << 7;
};
} // namespace ROCKSDB_NAMESPACE

View File

@ -1,268 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "db/blob/blob_file_cache.h"
#include <cassert>
#include <string>
#include "db/blob/blob_log_format.h"
#include "db/blob/blob_log_writer.h"
#include "env/mock_env.h"
#include "file/filename.h"
#include "file/read_write_util.h"
#include "file/writable_file_writer.h"
#include "options/cf_options.h"
#include "rocksdb/cache.h"
#include "rocksdb/env.h"
#include "rocksdb/file_system.h"
#include "rocksdb/options.h"
#include "rocksdb/statistics.h"
#include "test_util/sync_point.h"
#include "test_util/testharness.h"
namespace ROCKSDB_NAMESPACE {
namespace {
// Creates a test blob file with a single blob in it.
void WriteBlobFile(uint32_t column_family_id,
const ImmutableOptions& immutable_options,
uint64_t blob_file_number) {
assert(!immutable_options.cf_paths.empty());
const std::string blob_file_path =
BlobFileName(immutable_options.cf_paths.front().path, blob_file_number);
std::unique_ptr<FSWritableFile> file;
ASSERT_OK(NewWritableFile(immutable_options.fs.get(), blob_file_path, &file,
FileOptions()));
std::unique_ptr<WritableFileWriter> file_writer(new WritableFileWriter(
std::move(file), blob_file_path, FileOptions(), immutable_options.clock));
constexpr Statistics* statistics = nullptr;
constexpr bool use_fsync = false;
constexpr bool do_flush = false;
BlobLogWriter blob_log_writer(std::move(file_writer), immutable_options.clock,
statistics, blob_file_number, use_fsync,
do_flush);
constexpr bool has_ttl = false;
constexpr ExpirationRange expiration_range;
BlobLogHeader header(column_family_id, kNoCompression, has_ttl,
expiration_range);
ASSERT_OK(blob_log_writer.WriteHeader(header));
constexpr char key[] = "key";
constexpr char blob[] = "blob";
std::string compressed_blob;
uint64_t key_offset = 0;
uint64_t blob_offset = 0;
ASSERT_OK(blob_log_writer.AddRecord(key, blob, &key_offset, &blob_offset));
BlobLogFooter footer;
footer.blob_count = 1;
footer.expiration_range = expiration_range;
std::string checksum_method;
std::string checksum_value;
ASSERT_OK(
blob_log_writer.AppendFooter(footer, &checksum_method, &checksum_value));
}
} // anonymous namespace
class BlobFileCacheTest : public testing::Test {
protected:
BlobFileCacheTest() { mock_env_.reset(MockEnv::Create(Env::Default())); }
std::unique_ptr<Env> mock_env_;
};
TEST_F(BlobFileCacheTest, GetBlobFileReader) {
Options options;
options.env = mock_env_.get();
options.statistics = CreateDBStatistics();
options.cf_paths.emplace_back(
test::PerThreadDBPath(mock_env_.get(),
"BlobFileCacheTest_GetBlobFileReader"),
0);
options.enable_blob_files = true;
constexpr uint32_t column_family_id = 1;
ImmutableOptions immutable_options(options);
constexpr uint64_t blob_file_number = 123;
WriteBlobFile(column_family_id, immutable_options, blob_file_number);
constexpr size_t capacity = 10;
std::shared_ptr<Cache> backing_cache = NewLRUCache(capacity);
FileOptions file_options;
constexpr HistogramImpl* blob_file_read_hist = nullptr;
BlobFileCache blob_file_cache(backing_cache.get(), &immutable_options,
&file_options, column_family_id,
blob_file_read_hist, nullptr /*IOTracer*/);
// First try: reader should be opened and put in cache
CacheHandleGuard<BlobFileReader> first;
ASSERT_OK(blob_file_cache.GetBlobFileReader(blob_file_number, &first));
ASSERT_NE(first.GetValue(), nullptr);
ASSERT_EQ(options.statistics->getTickerCount(NO_FILE_OPENS), 1);
ASSERT_EQ(options.statistics->getTickerCount(NO_FILE_ERRORS), 0);
// Second try: reader should be served from cache
CacheHandleGuard<BlobFileReader> second;
ASSERT_OK(blob_file_cache.GetBlobFileReader(blob_file_number, &second));
ASSERT_NE(second.GetValue(), nullptr);
ASSERT_EQ(options.statistics->getTickerCount(NO_FILE_OPENS), 1);
ASSERT_EQ(options.statistics->getTickerCount(NO_FILE_ERRORS), 0);
ASSERT_EQ(first.GetValue(), second.GetValue());
}
TEST_F(BlobFileCacheTest, GetBlobFileReader_Race) {
Options options;
options.env = mock_env_.get();
options.statistics = CreateDBStatistics();
options.cf_paths.emplace_back(
test::PerThreadDBPath(mock_env_.get(),
"BlobFileCacheTest_GetBlobFileReader_Race"),
0);
options.enable_blob_files = true;
constexpr uint32_t column_family_id = 1;
ImmutableOptions immutable_options(options);
constexpr uint64_t blob_file_number = 123;
WriteBlobFile(column_family_id, immutable_options, blob_file_number);
constexpr size_t capacity = 10;
std::shared_ptr<Cache> backing_cache = NewLRUCache(capacity);
FileOptions file_options;
constexpr HistogramImpl* blob_file_read_hist = nullptr;
BlobFileCache blob_file_cache(backing_cache.get(), &immutable_options,
&file_options, column_family_id,
blob_file_read_hist, nullptr /*IOTracer*/);
CacheHandleGuard<BlobFileReader> first;
CacheHandleGuard<BlobFileReader> second;
SyncPoint::GetInstance()->SetCallBack(
"BlobFileCache::GetBlobFileReader:DoubleCheck", [&](void* /* arg */) {
// Disabling sync points to prevent infinite recursion
SyncPoint::GetInstance()->DisableProcessing();
ASSERT_OK(blob_file_cache.GetBlobFileReader(blob_file_number, &second));
ASSERT_NE(second.GetValue(), nullptr);
ASSERT_EQ(options.statistics->getTickerCount(NO_FILE_OPENS), 1);
ASSERT_EQ(options.statistics->getTickerCount(NO_FILE_ERRORS), 0);
});
SyncPoint::GetInstance()->EnableProcessing();
ASSERT_OK(blob_file_cache.GetBlobFileReader(blob_file_number, &first));
ASSERT_NE(first.GetValue(), nullptr);
ASSERT_EQ(options.statistics->getTickerCount(NO_FILE_OPENS), 1);
ASSERT_EQ(options.statistics->getTickerCount(NO_FILE_ERRORS), 0);
ASSERT_EQ(first.GetValue(), second.GetValue());
SyncPoint::GetInstance()->DisableProcessing();
SyncPoint::GetInstance()->ClearAllCallBacks();
}
TEST_F(BlobFileCacheTest, GetBlobFileReader_IOError) {
Options options;
options.env = mock_env_.get();
options.statistics = CreateDBStatistics();
options.cf_paths.emplace_back(
test::PerThreadDBPath(mock_env_.get(),
"BlobFileCacheTest_GetBlobFileReader_IOError"),
0);
options.enable_blob_files = true;
constexpr size_t capacity = 10;
std::shared_ptr<Cache> backing_cache = NewLRUCache(capacity);
ImmutableOptions immutable_options(options);
FileOptions file_options;
constexpr uint32_t column_family_id = 1;
constexpr HistogramImpl* blob_file_read_hist = nullptr;
BlobFileCache blob_file_cache(backing_cache.get(), &immutable_options,
&file_options, column_family_id,
blob_file_read_hist, nullptr /*IOTracer*/);
// Note: there is no blob file with the below number
constexpr uint64_t blob_file_number = 123;
CacheHandleGuard<BlobFileReader> reader;
ASSERT_TRUE(
blob_file_cache.GetBlobFileReader(blob_file_number, &reader).IsIOError());
ASSERT_EQ(reader.GetValue(), nullptr);
ASSERT_EQ(options.statistics->getTickerCount(NO_FILE_OPENS), 1);
ASSERT_EQ(options.statistics->getTickerCount(NO_FILE_ERRORS), 1);
}
TEST_F(BlobFileCacheTest, GetBlobFileReader_CacheFull) {
Options options;
options.env = mock_env_.get();
options.statistics = CreateDBStatistics();
options.cf_paths.emplace_back(
test::PerThreadDBPath(mock_env_.get(),
"BlobFileCacheTest_GetBlobFileReader_CacheFull"),
0);
options.enable_blob_files = true;
constexpr uint32_t column_family_id = 1;
ImmutableOptions immutable_options(options);
constexpr uint64_t blob_file_number = 123;
WriteBlobFile(column_family_id, immutable_options, blob_file_number);
constexpr size_t capacity = 0;
constexpr int num_shard_bits = -1; // determined automatically
constexpr bool strict_capacity_limit = true;
std::shared_ptr<Cache> backing_cache =
NewLRUCache(capacity, num_shard_bits, strict_capacity_limit);
FileOptions file_options;
constexpr HistogramImpl* blob_file_read_hist = nullptr;
BlobFileCache blob_file_cache(backing_cache.get(), &immutable_options,
&file_options, column_family_id,
blob_file_read_hist, nullptr /*IOTracer*/);
// Insert into cache should fail since it has zero capacity and
// strict_capacity_limit is set
CacheHandleGuard<BlobFileReader> reader;
ASSERT_TRUE(blob_file_cache.GetBlobFileReader(blob_file_number, &reader)
.IsIncomplete());
ASSERT_EQ(reader.GetValue(), nullptr);
ASSERT_EQ(options.statistics->getTickerCount(NO_FILE_OPENS), 1);
ASSERT_EQ(options.statistics->getTickerCount(NO_FILE_ERRORS), 1);
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

View File

@ -1,101 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#pragma once
#include "db/error_handler.h"
#include "db/event_helpers.h"
#include "file/sst_file_manager_impl.h"
#include "rocksdb/status.h"
namespace ROCKSDB_NAMESPACE {
class BlobFileCompletionCallback {
public:
BlobFileCompletionCallback(
SstFileManager* sst_file_manager, InstrumentedMutex* mutex,
ErrorHandler* error_handler, EventLogger* event_logger,
const std::vector<std::shared_ptr<EventListener>>& listeners,
const std::string& dbname)
: event_logger_(event_logger), listeners_(listeners), dbname_(dbname) {
#ifndef ROCKSDB_LITE
sst_file_manager_ = sst_file_manager;
mutex_ = mutex;
error_handler_ = error_handler;
#else
(void)sst_file_manager;
(void)mutex;
(void)error_handler;
#endif // ROCKSDB_LITE
}
void OnBlobFileCreationStarted(const std::string& file_name,
const std::string& column_family_name,
int job_id,
BlobFileCreationReason creation_reason) {
#ifndef ROCKSDB_LITE
// Notify the listeners.
EventHelpers::NotifyBlobFileCreationStarted(listeners_, dbname_,
column_family_name, file_name,
job_id, creation_reason);
#else
(void)file_name;
(void)column_family_name;
(void)job_id;
(void)creation_reason;
#endif
}
Status OnBlobFileCompleted(const std::string& file_name,
const std::string& column_family_name, int job_id,
uint64_t file_number,
BlobFileCreationReason creation_reason,
const Status& report_status,
const std::string& checksum_value,
const std::string& checksum_method,
uint64_t blob_count, uint64_t blob_bytes) {
Status s;
#ifndef ROCKSDB_LITE
auto sfm = static_cast<SstFileManagerImpl*>(sst_file_manager_);
if (sfm) {
// Report new blob files to SstFileManagerImpl
s = sfm->OnAddFile(file_name);
if (sfm->IsMaxAllowedSpaceReached()) {
s = Status::SpaceLimit("Max allowed space was reached");
TEST_SYNC_POINT(
"BlobFileCompletionCallback::CallBack::MaxAllowedSpaceReached");
InstrumentedMutexLock l(mutex_);
error_handler_->SetBGError(s, BackgroundErrorReason::kFlush);
}
}
#endif // !ROCKSDB_LITE
// Notify the listeners.
EventHelpers::LogAndNotifyBlobFileCreationFinished(
event_logger_, listeners_, dbname_, column_family_name, file_name,
job_id, file_number, creation_reason,
(!report_status.ok() ? report_status : s),
(checksum_value.empty() ? kUnknownFileChecksum : checksum_value),
(checksum_method.empty() ? kUnknownFileChecksumFuncName
: checksum_method),
blob_count, blob_bytes);
return s;
}
private:
#ifndef ROCKSDB_LITE
SstFileManager* sst_file_manager_;
InstrumentedMutex* mutex_;
ErrorHandler* error_handler_;
#endif // ROCKSDB_LITE
EventLogger* event_logger_;
std::vector<std::shared_ptr<EventListener>> listeners_;
std::string dbname_;
};
} // namespace ROCKSDB_NAMESPACE

View File

@ -8,13 +8,7 @@
#include <ostream>
#include <sstream>
#include "db/blob/blob_log_format.h"
#include "rocksdb/slice.h"
namespace ROCKSDB_NAMESPACE {
uint64_t SharedBlobFileMetaData::GetBlobFileSize() const {
return BlobLogHeader::kSize + total_blob_bytes_ + BlobLogFooter::kSize;
}
std::string SharedBlobFileMetaData::DebugString() const {
std::ostringstream oss;
@ -29,8 +23,7 @@ std::ostream& operator<<(std::ostream& os,
<< " total_blob_count: " << shared_meta.GetTotalBlobCount()
<< " total_blob_bytes: " << shared_meta.GetTotalBlobBytes()
<< " checksum_method: " << shared_meta.GetChecksumMethod()
<< " checksum_value: "
<< Slice(shared_meta.GetChecksumValue()).ToString(/* hex */ true);
<< " checksum_value: " << shared_meta.GetChecksumValue();
return os;
}
@ -45,15 +38,8 @@ std::string BlobFileMetaData::DebugString() const {
std::ostream& operator<<(std::ostream& os, const BlobFileMetaData& meta) {
const auto& shared_meta = meta.GetSharedMeta();
assert(shared_meta);
os << (*shared_meta);
os << " linked_ssts: {";
for (uint64_t file_number : meta.GetLinkedSsts()) {
os << ' ' << file_number;
}
os << " }";
os << " garbage_blob_count: " << meta.GetGarbageBlobCount()
os << (*shared_meta) << " garbage_blob_count: " << meta.GetGarbageBlobCount()
<< " garbage_blob_bytes: " << meta.GetGarbageBlobBytes();
return os;

View File

@ -5,13 +5,12 @@
#pragma once
#include "rocksdb/rocksdb_namespace.h"
#include <cassert>
#include <iosfwd>
#include <memory>
#include <string>
#include <unordered_set>
#include "rocksdb/rocksdb_namespace.h"
namespace ROCKSDB_NAMESPACE {
@ -51,7 +50,6 @@ class SharedBlobFileMetaData {
SharedBlobFileMetaData(SharedBlobFileMetaData&&) = delete;
SharedBlobFileMetaData& operator=(SharedBlobFileMetaData&&) = delete;
uint64_t GetBlobFileSize() const;
uint64_t GetBlobFileNumber() const { return blob_file_number_; }
uint64_t GetTotalBlobCount() const { return total_blob_count_; }
uint64_t GetTotalBlobBytes() const { return total_blob_bytes_; }
@ -92,15 +90,11 @@ std::ostream& operator<<(std::ostream& os,
class BlobFileMetaData {
public:
using LinkedSsts = std::unordered_set<uint64_t>;
static std::shared_ptr<BlobFileMetaData> Create(
std::shared_ptr<SharedBlobFileMetaData> shared_meta,
LinkedSsts linked_ssts, uint64_t garbage_blob_count,
uint64_t garbage_blob_bytes) {
return std::shared_ptr<BlobFileMetaData>(
new BlobFileMetaData(std::move(shared_meta), std::move(linked_ssts),
garbage_blob_count, garbage_blob_bytes));
uint64_t garbage_blob_count, uint64_t garbage_blob_bytes) {
return std::shared_ptr<BlobFileMetaData>(new BlobFileMetaData(
std::move(shared_meta), garbage_blob_count, garbage_blob_bytes));
}
BlobFileMetaData(const BlobFileMetaData&) = delete;
@ -113,11 +107,6 @@ class BlobFileMetaData {
return shared_meta_;
}
uint64_t GetBlobFileSize() const {
assert(shared_meta_);
return shared_meta_->GetBlobFileSize();
}
uint64_t GetBlobFileNumber() const {
assert(shared_meta_);
return shared_meta_->GetBlobFileNumber();
@ -139,8 +128,6 @@ class BlobFileMetaData {
return shared_meta_->GetChecksumValue();
}
const LinkedSsts& GetLinkedSsts() const { return linked_ssts_; }
uint64_t GetGarbageBlobCount() const { return garbage_blob_count_; }
uint64_t GetGarbageBlobBytes() const { return garbage_blob_bytes_; }
@ -148,10 +135,8 @@ class BlobFileMetaData {
private:
BlobFileMetaData(std::shared_ptr<SharedBlobFileMetaData> shared_meta,
LinkedSsts linked_ssts, uint64_t garbage_blob_count,
uint64_t garbage_blob_bytes)
uint64_t garbage_blob_count, uint64_t garbage_blob_bytes)
: shared_meta_(std::move(shared_meta)),
linked_ssts_(std::move(linked_ssts)),
garbage_blob_count_(garbage_blob_count),
garbage_blob_bytes_(garbage_blob_bytes) {
assert(shared_meta_);
@ -160,7 +145,6 @@ class BlobFileMetaData {
}
std::shared_ptr<SharedBlobFileMetaData> shared_meta_;
LinkedSsts linked_ssts_;
uint64_t garbage_blob_count_;
uint64_t garbage_blob_bytes_;
};

View File

@ -1,588 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "db/blob/blob_file_reader.h"
#include <cassert>
#include <string>
#include "db/blob/blob_log_format.h"
#include "file/file_prefetch_buffer.h"
#include "file/filename.h"
#include "monitoring/statistics.h"
#include "options/cf_options.h"
#include "rocksdb/file_system.h"
#include "rocksdb/slice.h"
#include "rocksdb/status.h"
#include "test_util/sync_point.h"
#include "util/compression.h"
#include "util/crc32c.h"
#include "util/stop_watch.h"
namespace ROCKSDB_NAMESPACE {
Status BlobFileReader::Create(
const ImmutableOptions& immutable_options, const FileOptions& file_options,
uint32_t column_family_id, HistogramImpl* blob_file_read_hist,
uint64_t blob_file_number, const std::shared_ptr<IOTracer>& io_tracer,
std::unique_ptr<BlobFileReader>* blob_file_reader) {
assert(blob_file_reader);
assert(!*blob_file_reader);
uint64_t file_size = 0;
std::unique_ptr<RandomAccessFileReader> file_reader;
{
const Status s =
OpenFile(immutable_options, file_options, blob_file_read_hist,
blob_file_number, io_tracer, &file_size, &file_reader);
if (!s.ok()) {
return s;
}
}
assert(file_reader);
Statistics* const statistics = immutable_options.stats;
CompressionType compression_type = kNoCompression;
{
const Status s = ReadHeader(file_reader.get(), column_family_id, statistics,
&compression_type);
if (!s.ok()) {
return s;
}
}
{
const Status s = ReadFooter(file_reader.get(), file_size, statistics);
if (!s.ok()) {
return s;
}
}
blob_file_reader->reset(
new BlobFileReader(std::move(file_reader), file_size, compression_type,
immutable_options.clock, statistics));
return Status::OK();
}
Status BlobFileReader::OpenFile(
const ImmutableOptions& immutable_options, const FileOptions& file_opts,
HistogramImpl* blob_file_read_hist, uint64_t blob_file_number,
const std::shared_ptr<IOTracer>& io_tracer, uint64_t* file_size,
std::unique_ptr<RandomAccessFileReader>* file_reader) {
assert(file_size);
assert(file_reader);
const auto& cf_paths = immutable_options.cf_paths;
assert(!cf_paths.empty());
const std::string blob_file_path =
BlobFileName(cf_paths.front().path, blob_file_number);
FileSystem* const fs = immutable_options.fs.get();
assert(fs);
constexpr IODebugContext* dbg = nullptr;
{
TEST_SYNC_POINT("BlobFileReader::OpenFile:GetFileSize");
const Status s =
fs->GetFileSize(blob_file_path, IOOptions(), file_size, dbg);
if (!s.ok()) {
return s;
}
}
if (*file_size < BlobLogHeader::kSize + BlobLogFooter::kSize) {
return Status::Corruption("Malformed blob file");
}
std::unique_ptr<FSRandomAccessFile> file;
{
TEST_SYNC_POINT("BlobFileReader::OpenFile:NewRandomAccessFile");
const Status s =
fs->NewRandomAccessFile(blob_file_path, file_opts, &file, dbg);
if (!s.ok()) {
return s;
}
}
assert(file);
if (immutable_options.advise_random_on_open) {
file->Hint(FSRandomAccessFile::kRandom);
}
file_reader->reset(new RandomAccessFileReader(
std::move(file), blob_file_path, immutable_options.clock, io_tracer,
immutable_options.stats, BLOB_DB_BLOB_FILE_READ_MICROS,
blob_file_read_hist, immutable_options.rate_limiter.get(),
immutable_options.listeners));
return Status::OK();
}
Status BlobFileReader::ReadHeader(const RandomAccessFileReader* file_reader,
uint32_t column_family_id,
Statistics* statistics,
CompressionType* compression_type) {
assert(file_reader);
assert(compression_type);
Slice header_slice;
Buffer buf;
AlignedBuf aligned_buf;
{
TEST_SYNC_POINT("BlobFileReader::ReadHeader:ReadFromFile");
constexpr uint64_t read_offset = 0;
constexpr size_t read_size = BlobLogHeader::kSize;
// TODO: rate limit reading headers from blob files.
const Status s = ReadFromFile(file_reader, read_offset, read_size,
statistics, &header_slice, &buf, &aligned_buf,
Env::IO_TOTAL /* rate_limiter_priority */);
if (!s.ok()) {
return s;
}
TEST_SYNC_POINT_CALLBACK("BlobFileReader::ReadHeader:TamperWithResult",
&header_slice);
}
BlobLogHeader header;
{
const Status s = header.DecodeFrom(header_slice);
if (!s.ok()) {
return s;
}
}
constexpr ExpirationRange no_expiration_range;
if (header.has_ttl || header.expiration_range != no_expiration_range) {
return Status::Corruption("Unexpected TTL blob file");
}
if (header.column_family_id != column_family_id) {
return Status::Corruption("Column family ID mismatch");
}
*compression_type = header.compression;
return Status::OK();
}
Status BlobFileReader::ReadFooter(const RandomAccessFileReader* file_reader,
uint64_t file_size, Statistics* statistics) {
assert(file_size >= BlobLogHeader::kSize + BlobLogFooter::kSize);
assert(file_reader);
Slice footer_slice;
Buffer buf;
AlignedBuf aligned_buf;
{
TEST_SYNC_POINT("BlobFileReader::ReadFooter:ReadFromFile");
const uint64_t read_offset = file_size - BlobLogFooter::kSize;
constexpr size_t read_size = BlobLogFooter::kSize;
// TODO: rate limit reading footers from blob files.
const Status s = ReadFromFile(file_reader, read_offset, read_size,
statistics, &footer_slice, &buf, &aligned_buf,
Env::IO_TOTAL /* rate_limiter_priority */);
if (!s.ok()) {
return s;
}
TEST_SYNC_POINT_CALLBACK("BlobFileReader::ReadFooter:TamperWithResult",
&footer_slice);
}
BlobLogFooter footer;
{
const Status s = footer.DecodeFrom(footer_slice);
if (!s.ok()) {
return s;
}
}
constexpr ExpirationRange no_expiration_range;
if (footer.expiration_range != no_expiration_range) {
return Status::Corruption("Unexpected TTL blob file");
}
return Status::OK();
}
Status BlobFileReader::ReadFromFile(const RandomAccessFileReader* file_reader,
uint64_t read_offset, size_t read_size,
Statistics* statistics, Slice* slice,
Buffer* buf, AlignedBuf* aligned_buf,
Env::IOPriority rate_limiter_priority) {
assert(slice);
assert(buf);
assert(aligned_buf);
assert(file_reader);
RecordTick(statistics, BLOB_DB_BLOB_FILE_BYTES_READ, read_size);
Status s;
if (file_reader->use_direct_io()) {
constexpr char* scratch = nullptr;
s = file_reader->Read(IOOptions(), read_offset, read_size, slice, scratch,
aligned_buf, rate_limiter_priority);
} else {
buf->reset(new char[read_size]);
constexpr AlignedBuf* aligned_scratch = nullptr;
s = file_reader->Read(IOOptions(), read_offset, read_size, slice,
buf->get(), aligned_scratch, rate_limiter_priority);
}
if (!s.ok()) {
return s;
}
if (slice->size() != read_size) {
return Status::Corruption("Failed to read data from blob file");
}
return Status::OK();
}
BlobFileReader::BlobFileReader(
std::unique_ptr<RandomAccessFileReader>&& file_reader, uint64_t file_size,
CompressionType compression_type, SystemClock* clock,
Statistics* statistics)
: file_reader_(std::move(file_reader)),
file_size_(file_size),
compression_type_(compression_type),
clock_(clock),
statistics_(statistics) {
assert(file_reader_);
}
BlobFileReader::~BlobFileReader() = default;
Status BlobFileReader::GetBlob(const ReadOptions& read_options,
const Slice& user_key, uint64_t offset,
uint64_t value_size,
CompressionType compression_type,
FilePrefetchBuffer* prefetch_buffer,
PinnableSlice* value,
uint64_t* bytes_read) const {
assert(value);
const uint64_t key_size = user_key.size();
if (!IsValidBlobOffset(offset, key_size, value_size, file_size_)) {
return Status::Corruption("Invalid blob offset");
}
if (compression_type != compression_type_) {
return Status::Corruption("Compression type mismatch when reading blob");
}
// Note: if verify_checksum is set, we read the entire blob record to be able
// to perform the verification; otherwise, we just read the blob itself. Since
// the offset in BlobIndex actually points to the blob value, we need to make
// an adjustment in the former case.
const uint64_t adjustment =
read_options.verify_checksums
? BlobLogRecord::CalculateAdjustmentForRecordHeader(key_size)
: 0;
assert(offset >= adjustment);
const uint64_t record_offset = offset - adjustment;
const uint64_t record_size = value_size + adjustment;
Slice record_slice;
Buffer buf;
AlignedBuf aligned_buf;
bool prefetched = false;
if (prefetch_buffer) {
Status s;
constexpr bool for_compaction = true;
prefetched = prefetch_buffer->TryReadFromCache(
IOOptions(), file_reader_.get(), record_offset,
static_cast<size_t>(record_size), &record_slice, &s,
read_options.rate_limiter_priority, for_compaction);
if (!s.ok()) {
return s;
}
}
if (!prefetched) {
TEST_SYNC_POINT("BlobFileReader::GetBlob:ReadFromFile");
const Status s = ReadFromFile(file_reader_.get(), record_offset,
static_cast<size_t>(record_size), statistics_,
&record_slice, &buf, &aligned_buf,
read_options.rate_limiter_priority);
if (!s.ok()) {
return s;
}
}
TEST_SYNC_POINT_CALLBACK("BlobFileReader::GetBlob:TamperWithResult",
&record_slice);
if (read_options.verify_checksums) {
const Status s = VerifyBlob(record_slice, user_key, value_size);
if (!s.ok()) {
return s;
}
}
const Slice value_slice(record_slice.data() + adjustment, value_size);
{
const Status s = UncompressBlobIfNeeded(value_slice, compression_type,
clock_, statistics_, value);
if (!s.ok()) {
return s;
}
}
if (bytes_read) {
*bytes_read = record_size;
}
return Status::OK();
}
void BlobFileReader::MultiGetBlob(
const ReadOptions& read_options,
const autovector<std::reference_wrapper<const Slice>>& user_keys,
const autovector<uint64_t>& offsets,
const autovector<uint64_t>& value_sizes, autovector<Status*>& statuses,
autovector<PinnableSlice*>& values, uint64_t* bytes_read) const {
const size_t num_blobs = user_keys.size();
assert(num_blobs > 0);
assert(num_blobs == offsets.size());
assert(num_blobs == value_sizes.size());
assert(num_blobs == statuses.size());
assert(num_blobs == values.size());
#ifndef NDEBUG
for (size_t i = 0; i < offsets.size() - 1; ++i) {
assert(offsets[i] <= offsets[i + 1]);
}
#endif // !NDEBUG
std::vector<FSReadRequest> read_reqs(num_blobs);
autovector<uint64_t> adjustments;
uint64_t total_len = 0;
for (size_t i = 0; i < num_blobs; ++i) {
const size_t key_size = user_keys[i].get().size();
assert(IsValidBlobOffset(offsets[i], key_size, value_sizes[i], file_size_));
const uint64_t adjustment =
read_options.verify_checksums
? BlobLogRecord::CalculateAdjustmentForRecordHeader(key_size)
: 0;
assert(offsets[i] >= adjustment);
adjustments.push_back(adjustment);
read_reqs[i].offset = offsets[i] - adjustment;
read_reqs[i].len = value_sizes[i] + adjustment;
total_len += read_reqs[i].len;
}
RecordTick(statistics_, BLOB_DB_BLOB_FILE_BYTES_READ, total_len);
Buffer buf;
AlignedBuf aligned_buf;
Status s;
bool direct_io = file_reader_->use_direct_io();
if (direct_io) {
for (size_t i = 0; i < read_reqs.size(); ++i) {
read_reqs[i].scratch = nullptr;
}
} else {
buf.reset(new char[total_len]);
std::ptrdiff_t pos = 0;
for (size_t i = 0; i < read_reqs.size(); ++i) {
read_reqs[i].scratch = buf.get() + pos;
pos += read_reqs[i].len;
}
}
TEST_SYNC_POINT("BlobFileReader::MultiGetBlob:ReadFromFile");
s = file_reader_->MultiRead(IOOptions(), read_reqs.data(), read_reqs.size(),
direct_io ? &aligned_buf : nullptr,
read_options.rate_limiter_priority);
if (!s.ok()) {
for (auto& req : read_reqs) {
req.status.PermitUncheckedError();
}
for (size_t i = 0; i < num_blobs; ++i) {
assert(statuses[i]);
*statuses[i] = s;
}
return;
}
assert(s.ok());
for (size_t i = 0; i < num_blobs; ++i) {
auto& req = read_reqs[i];
assert(statuses[i]);
if (req.status.ok() && req.result.size() != req.len) {
req.status = IOStatus::Corruption("Failed to read data from blob file");
}
*statuses[i] = req.status;
}
if (read_options.verify_checksums) {
for (size_t i = 0; i < num_blobs; ++i) {
assert(statuses[i]);
if (!statuses[i]->ok()) {
continue;
}
const Slice& record_slice = read_reqs[i].result;
s = VerifyBlob(record_slice, user_keys[i], value_sizes[i]);
if (!s.ok()) {
assert(statuses[i]);
*statuses[i] = s;
}
}
}
for (size_t i = 0; i < num_blobs; ++i) {
assert(statuses[i]);
if (!statuses[i]->ok()) {
continue;
}
const Slice& record_slice = read_reqs[i].result;
const Slice value_slice(record_slice.data() + adjustments[i],
value_sizes[i]);
s = UncompressBlobIfNeeded(value_slice, compression_type_, clock_,
statistics_, values[i]);
if (!s.ok()) {
*statuses[i] = s;
}
}
if (bytes_read) {
uint64_t total_bytes = 0;
for (const auto& req : read_reqs) {
total_bytes += req.result.size();
}
*bytes_read = total_bytes;
}
}
Status BlobFileReader::VerifyBlob(const Slice& record_slice,
const Slice& user_key, uint64_t value_size) {
BlobLogRecord record;
const Slice header_slice(record_slice.data(), BlobLogRecord::kHeaderSize);
{
const Status s = record.DecodeHeaderFrom(header_slice);
if (!s.ok()) {
return s;
}
}
if (record.key_size != user_key.size()) {
return Status::Corruption("Key size mismatch when reading blob");
}
if (record.value_size != value_size) {
return Status::Corruption("Value size mismatch when reading blob");
}
record.key =
Slice(record_slice.data() + BlobLogRecord::kHeaderSize, record.key_size);
if (record.key != user_key) {
return Status::Corruption("Key mismatch when reading blob");
}
record.value = Slice(record.key.data() + record.key_size, value_size);
{
TEST_SYNC_POINT_CALLBACK("BlobFileReader::VerifyBlob:CheckBlobCRC",
&record);
const Status s = record.CheckBlobCRC();
if (!s.ok()) {
return s;
}
}
return Status::OK();
}
Status BlobFileReader::UncompressBlobIfNeeded(const Slice& value_slice,
CompressionType compression_type,
SystemClock* clock,
Statistics* statistics,
PinnableSlice* value) {
assert(value);
if (compression_type == kNoCompression) {
SaveValue(value_slice, value);
return Status::OK();
}
UncompressionContext context(compression_type);
UncompressionInfo info(context, UncompressionDict::GetEmptyDict(),
compression_type);
size_t uncompressed_size = 0;
constexpr uint32_t compression_format_version = 2;
constexpr MemoryAllocator* allocator = nullptr;
CacheAllocationPtr output;
{
StopWatch stop_watch(clock, statistics, BLOB_DB_DECOMPRESSION_MICROS);
output = UncompressData(info, value_slice.data(), value_slice.size(),
&uncompressed_size, compression_format_version,
allocator);
}
TEST_SYNC_POINT_CALLBACK(
"BlobFileReader::UncompressBlobIfNeeded:TamperWithResult", &output);
if (!output) {
return Status::Corruption("Unable to uncompress blob");
}
SaveValue(Slice(output.get(), uncompressed_size), value);
return Status::OK();
}
void BlobFileReader::SaveValue(const Slice& src, PinnableSlice* dst) {
assert(dst);
if (dst->IsPinned()) {
dst->Reset();
}
dst->PinSelf(src);
}
} // namespace ROCKSDB_NAMESPACE

View File

@ -1,107 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#include <cinttypes>
#include <memory>
#include "file/random_access_file_reader.h"
#include "rocksdb/compression_type.h"
#include "rocksdb/rocksdb_namespace.h"
#include "util/autovector.h"
namespace ROCKSDB_NAMESPACE {
class Status;
struct ImmutableOptions;
struct FileOptions;
class HistogramImpl;
struct ReadOptions;
class Slice;
class FilePrefetchBuffer;
class PinnableSlice;
class Statistics;
class BlobFileReader {
public:
static Status Create(const ImmutableOptions& immutable_options,
const FileOptions& file_options,
uint32_t column_family_id,
HistogramImpl* blob_file_read_hist,
uint64_t blob_file_number,
const std::shared_ptr<IOTracer>& io_tracer,
std::unique_ptr<BlobFileReader>* reader);
BlobFileReader(const BlobFileReader&) = delete;
BlobFileReader& operator=(const BlobFileReader&) = delete;
~BlobFileReader();
Status GetBlob(const ReadOptions& read_options, const Slice& user_key,
uint64_t offset, uint64_t value_size,
CompressionType compression_type,
FilePrefetchBuffer* prefetch_buffer, PinnableSlice* value,
uint64_t* bytes_read) const;
// offsets must be sorted in ascending order by caller.
void MultiGetBlob(
const ReadOptions& read_options,
const autovector<std::reference_wrapper<const Slice>>& user_keys,
const autovector<uint64_t>& offsets,
const autovector<uint64_t>& value_sizes, autovector<Status*>& statuses,
autovector<PinnableSlice*>& values, uint64_t* bytes_read) const;
CompressionType GetCompressionType() const { return compression_type_; }
uint64_t GetFileSize() const { return file_size_; }
private:
BlobFileReader(std::unique_ptr<RandomAccessFileReader>&& file_reader,
uint64_t file_size, CompressionType compression_type,
SystemClock* clock, Statistics* statistics);
static Status OpenFile(const ImmutableOptions& immutable_options,
const FileOptions& file_opts,
HistogramImpl* blob_file_read_hist,
uint64_t blob_file_number,
const std::shared_ptr<IOTracer>& io_tracer,
uint64_t* file_size,
std::unique_ptr<RandomAccessFileReader>* file_reader);
static Status ReadHeader(const RandomAccessFileReader* file_reader,
uint32_t column_family_id, Statistics* statistics,
CompressionType* compression_type);
static Status ReadFooter(const RandomAccessFileReader* file_reader,
uint64_t file_size, Statistics* statistics);
using Buffer = std::unique_ptr<char[]>;
static Status ReadFromFile(const RandomAccessFileReader* file_reader,
uint64_t read_offset, size_t read_size,
Statistics* statistics, Slice* slice, Buffer* buf,
AlignedBuf* aligned_buf,
Env::IOPriority rate_limiter_priority);
static Status VerifyBlob(const Slice& record_slice, const Slice& user_key,
uint64_t value_size);
static Status UncompressBlobIfNeeded(const Slice& value_slice,
CompressionType compression_type,
SystemClock* clock,
Statistics* statistics,
PinnableSlice* value);
static void SaveValue(const Slice& src, PinnableSlice* dst);
std::unique_ptr<RandomAccessFileReader> file_reader_;
uint64_t file_size_;
CompressionType compression_type_;
SystemClock* clock_;
Statistics* statistics_;
};
} // namespace ROCKSDB_NAMESPACE

View File

@ -1,974 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "db/blob/blob_file_reader.h"
#include <cassert>
#include <string>
#include "db/blob/blob_log_format.h"
#include "db/blob/blob_log_writer.h"
#include "env/mock_env.h"
#include "file/filename.h"
#include "file/read_write_util.h"
#include "file/writable_file_writer.h"
#include "options/cf_options.h"
#include "rocksdb/env.h"
#include "rocksdb/file_system.h"
#include "rocksdb/options.h"
#include "test_util/sync_point.h"
#include "test_util/testharness.h"
#include "util/compression.h"
#include "utilities/fault_injection_env.h"
namespace ROCKSDB_NAMESPACE {
namespace {
// Creates a test blob file with `num` blobs in it.
void WriteBlobFile(const ImmutableOptions& immutable_options,
uint32_t column_family_id, bool has_ttl,
const ExpirationRange& expiration_range_header,
const ExpirationRange& expiration_range_footer,
uint64_t blob_file_number, const std::vector<Slice>& keys,
const std::vector<Slice>& blobs, CompressionType compression,
std::vector<uint64_t>& blob_offsets,
std::vector<uint64_t>& blob_sizes) {
assert(!immutable_options.cf_paths.empty());
size_t num = keys.size();
assert(num == blobs.size());
assert(num == blob_offsets.size());
assert(num == blob_sizes.size());
const std::string blob_file_path =
BlobFileName(immutable_options.cf_paths.front().path, blob_file_number);
std::unique_ptr<FSWritableFile> file;
ASSERT_OK(NewWritableFile(immutable_options.fs.get(), blob_file_path, &file,
FileOptions()));
std::unique_ptr<WritableFileWriter> file_writer(new WritableFileWriter(
std::move(file), blob_file_path, FileOptions(), immutable_options.clock));
constexpr Statistics* statistics = nullptr;
constexpr bool use_fsync = false;
constexpr bool do_flush = false;
BlobLogWriter blob_log_writer(std::move(file_writer), immutable_options.clock,
statistics, blob_file_number, use_fsync,
do_flush);
BlobLogHeader header(column_family_id, compression, has_ttl,
expiration_range_header);
ASSERT_OK(blob_log_writer.WriteHeader(header));
std::vector<std::string> compressed_blobs(num);
std::vector<Slice> blobs_to_write(num);
if (kNoCompression == compression) {
for (size_t i = 0; i < num; ++i) {
blobs_to_write[i] = blobs[i];
blob_sizes[i] = blobs[i].size();
}
} else {
CompressionOptions opts;
CompressionContext context(compression);
constexpr uint64_t sample_for_compression = 0;
CompressionInfo info(opts, context, CompressionDict::GetEmptyDict(),
compression, sample_for_compression);
constexpr uint32_t compression_format_version = 2;
for (size_t i = 0; i < num; ++i) {
ASSERT_TRUE(CompressData(blobs[i], info, compression_format_version,
&compressed_blobs[i]));
blobs_to_write[i] = compressed_blobs[i];
blob_sizes[i] = compressed_blobs[i].size();
}
}
for (size_t i = 0; i < num; ++i) {
uint64_t key_offset = 0;
ASSERT_OK(blob_log_writer.AddRecord(keys[i], blobs_to_write[i], &key_offset,
&blob_offsets[i]));
}
BlobLogFooter footer;
footer.blob_count = num;
footer.expiration_range = expiration_range_footer;
std::string checksum_method;
std::string checksum_value;
ASSERT_OK(
blob_log_writer.AppendFooter(footer, &checksum_method, &checksum_value));
}
// Creates a test blob file with a single blob in it. Note: this method
// makes it possible to test various corner cases by allowing the caller
// to specify the contents of various blob file header/footer fields.
void WriteBlobFile(const ImmutableOptions& immutable_options,
uint32_t column_family_id, bool has_ttl,
const ExpirationRange& expiration_range_header,
const ExpirationRange& expiration_range_footer,
uint64_t blob_file_number, const Slice& key,
const Slice& blob, CompressionType compression,
uint64_t* blob_offset, uint64_t* blob_size) {
std::vector<Slice> keys{key};
std::vector<Slice> blobs{blob};
std::vector<uint64_t> blob_offsets{0};
std::vector<uint64_t> blob_sizes{0};
WriteBlobFile(immutable_options, column_family_id, has_ttl,
expiration_range_header, expiration_range_footer,
blob_file_number, keys, blobs, compression, blob_offsets,
blob_sizes);
if (blob_offset) {
*blob_offset = blob_offsets[0];
}
if (blob_size) {
*blob_size = blob_sizes[0];
}
}
} // anonymous namespace
class BlobFileReaderTest : public testing::Test {
protected:
BlobFileReaderTest() { mock_env_.reset(MockEnv::Create(Env::Default())); }
std::unique_ptr<Env> mock_env_;
};
TEST_F(BlobFileReaderTest, CreateReaderAndGetBlob) {
Options options;
options.env = mock_env_.get();
options.cf_paths.emplace_back(
test::PerThreadDBPath(mock_env_.get(),
"BlobFileReaderTest_CreateReaderAndGetBlob"),
0);
options.enable_blob_files = true;
ImmutableOptions immutable_options(options);
constexpr uint32_t column_family_id = 1;
constexpr bool has_ttl = false;
constexpr ExpirationRange expiration_range;
constexpr uint64_t blob_file_number = 1;
constexpr size_t num_blobs = 3;
const std::vector<std::string> key_strs = {"key1", "key2", "key3"};
const std::vector<std::string> blob_strs = {"blob1", "blob2", "blob3"};
const std::vector<Slice> keys = {key_strs[0], key_strs[1], key_strs[2]};
const std::vector<Slice> blobs = {blob_strs[0], blob_strs[1], blob_strs[2]};
std::vector<uint64_t> blob_offsets(keys.size());
std::vector<uint64_t> blob_sizes(keys.size());
WriteBlobFile(immutable_options, column_family_id, has_ttl, expiration_range,
expiration_range, blob_file_number, keys, blobs, kNoCompression,
blob_offsets, blob_sizes);
constexpr HistogramImpl* blob_file_read_hist = nullptr;
std::unique_ptr<BlobFileReader> reader;
ASSERT_OK(BlobFileReader::Create(
immutable_options, FileOptions(), column_family_id, blob_file_read_hist,
blob_file_number, nullptr /*IOTracer*/, &reader));
// Make sure the blob can be retrieved with and without checksum verification
ReadOptions read_options;
read_options.verify_checksums = false;
constexpr FilePrefetchBuffer* prefetch_buffer = nullptr;
{
PinnableSlice value;
uint64_t bytes_read = 0;
ASSERT_OK(reader->GetBlob(read_options, keys[0], blob_offsets[0],
blob_sizes[0], kNoCompression, prefetch_buffer,
&value, &bytes_read));
ASSERT_EQ(value, blobs[0]);
ASSERT_EQ(bytes_read, blob_sizes[0]);
// MultiGetBlob
bytes_read = 0;
size_t total_size = 0;
autovector<std::reference_wrapper<const Slice>> key_refs;
for (const auto& key_ref : keys) {
key_refs.emplace_back(std::cref(key_ref));
}
autovector<uint64_t> offsets{blob_offsets[0], blob_offsets[1],
blob_offsets[2]};
autovector<uint64_t> sizes{blob_sizes[0], blob_sizes[1], blob_sizes[2]};
std::array<Status, num_blobs> statuses_buf;
autovector<Status*> statuses{&statuses_buf[0], &statuses_buf[1],
&statuses_buf[2]};
std::array<PinnableSlice, num_blobs> value_buf;
autovector<PinnableSlice*> values{&value_buf[0], &value_buf[1],
&value_buf[2]};
reader->MultiGetBlob(read_options, key_refs, offsets, sizes, statuses,
values, &bytes_read);
for (size_t i = 0; i < num_blobs; ++i) {
ASSERT_OK(statuses_buf[i]);
ASSERT_EQ(value_buf[i], blobs[i]);
total_size += blob_sizes[i];
}
ASSERT_EQ(bytes_read, total_size);
}
read_options.verify_checksums = true;
{
PinnableSlice value;
uint64_t bytes_read = 0;
ASSERT_OK(reader->GetBlob(read_options, keys[1], blob_offsets[1],
blob_sizes[1], kNoCompression, prefetch_buffer,
&value, &bytes_read));
ASSERT_EQ(value, blobs[1]);
const uint64_t key_size = keys[1].size();
ASSERT_EQ(bytes_read,
BlobLogRecord::CalculateAdjustmentForRecordHeader(key_size) +
blob_sizes[1]);
}
// Invalid offset (too close to start of file)
{
PinnableSlice value;
uint64_t bytes_read = 0;
ASSERT_TRUE(reader
->GetBlob(read_options, keys[0], blob_offsets[0] - 1,
blob_sizes[0], kNoCompression, prefetch_buffer,
&value, &bytes_read)
.IsCorruption());
ASSERT_EQ(bytes_read, 0);
}
// Invalid offset (too close to end of file)
{
PinnableSlice value;
uint64_t bytes_read = 0;
ASSERT_TRUE(reader
->GetBlob(read_options, keys[2], blob_offsets[2] + 1,
blob_sizes[2], kNoCompression, prefetch_buffer,
&value, &bytes_read)
.IsCorruption());
ASSERT_EQ(bytes_read, 0);
}
// Incorrect compression type
{
PinnableSlice value;
uint64_t bytes_read = 0;
ASSERT_TRUE(reader
->GetBlob(read_options, keys[0], blob_offsets[0],
blob_sizes[0], kZSTD, prefetch_buffer, &value,
&bytes_read)
.IsCorruption());
ASSERT_EQ(bytes_read, 0);
}
// Incorrect key size
{
constexpr char shorter_key[] = "k";
PinnableSlice value;
uint64_t bytes_read = 0;
ASSERT_TRUE(reader
->GetBlob(read_options, shorter_key,
blob_offsets[0] -
(keys[0].size() - sizeof(shorter_key) + 1),
blob_sizes[0], kNoCompression, prefetch_buffer,
&value, &bytes_read)
.IsCorruption());
ASSERT_EQ(bytes_read, 0);
// MultiGetBlob
autovector<std::reference_wrapper<const Slice>> key_refs;
for (const auto& key_ref : keys) {
key_refs.emplace_back(std::cref(key_ref));
}
Slice shorter_key_slice(shorter_key, sizeof(shorter_key) - 1);
key_refs[1] = std::cref(shorter_key_slice);
autovector<uint64_t> offsets{
blob_offsets[0],
blob_offsets[1] - (keys[1].size() - key_refs[1].get().size()),
blob_offsets[2]};
autovector<uint64_t> sizes{blob_sizes[0], blob_sizes[1], blob_sizes[2]};
std::array<Status, num_blobs> statuses_buf;
autovector<Status*> statuses{&statuses_buf[0], &statuses_buf[1],
&statuses_buf[2]};
std::array<PinnableSlice, num_blobs> value_buf;
autovector<PinnableSlice*> values{&value_buf[0], &value_buf[1],
&value_buf[2]};
reader->MultiGetBlob(read_options, key_refs, offsets, sizes, statuses,
values, &bytes_read);
for (size_t i = 0; i < num_blobs; ++i) {
if (i == 1) {
ASSERT_TRUE(statuses_buf[i].IsCorruption());
} else {
ASSERT_OK(statuses_buf[i]);
}
}
}
// Incorrect key
{
constexpr char incorrect_key[] = "foo1";
PinnableSlice value;
uint64_t bytes_read = 0;
ASSERT_TRUE(reader
->GetBlob(read_options, incorrect_key, blob_offsets[0],
blob_sizes[0], kNoCompression, prefetch_buffer,
&value, &bytes_read)
.IsCorruption());
ASSERT_EQ(bytes_read, 0);
// MultiGetBlob
autovector<std::reference_wrapper<const Slice>> key_refs;
for (const auto& key_ref : keys) {
key_refs.emplace_back(std::cref(key_ref));
}
Slice wrong_key_slice(incorrect_key, sizeof(incorrect_key) - 1);
key_refs[2] = std::cref(wrong_key_slice);
autovector<uint64_t> offsets{blob_offsets[0], blob_offsets[1],
blob_offsets[2]};
autovector<uint64_t> sizes{blob_sizes[0], blob_sizes[1], blob_sizes[2]};
std::array<Status, num_blobs> statuses_buf;
autovector<Status*> statuses{&statuses_buf[0], &statuses_buf[1],
&statuses_buf[2]};
std::array<PinnableSlice, num_blobs> value_buf;
autovector<PinnableSlice*> values{&value_buf[0], &value_buf[1],
&value_buf[2]};
reader->MultiGetBlob(read_options, key_refs, offsets, sizes, statuses,
values, &bytes_read);
for (size_t i = 0; i < num_blobs; ++i) {
if (i == num_blobs - 1) {
ASSERT_TRUE(statuses_buf[i].IsCorruption());
} else {
ASSERT_OK(statuses_buf[i]);
}
}
}
// Incorrect value size
{
PinnableSlice value;
uint64_t bytes_read = 0;
ASSERT_TRUE(reader
->GetBlob(read_options, keys[1], blob_offsets[1],
blob_sizes[1] + 1, kNoCompression,
prefetch_buffer, &value, &bytes_read)
.IsCorruption());
ASSERT_EQ(bytes_read, 0);
// MultiGetBlob
autovector<std::reference_wrapper<const Slice>> key_refs;
for (const auto& key_ref : keys) {
key_refs.emplace_back(std::cref(key_ref));
}
autovector<uint64_t> offsets{blob_offsets[0], blob_offsets[1],
blob_offsets[2]};
autovector<uint64_t> sizes{blob_sizes[0], blob_sizes[1] + 1, blob_sizes[2]};
std::array<Status, num_blobs> statuses_buf;
autovector<Status*> statuses{&statuses_buf[0], &statuses_buf[1],
&statuses_buf[2]};
std::array<PinnableSlice, num_blobs> value_buf;
autovector<PinnableSlice*> values{&value_buf[0], &value_buf[1],
&value_buf[2]};
reader->MultiGetBlob(read_options, key_refs, offsets, sizes, statuses,
values, &bytes_read);
for (size_t i = 0; i < num_blobs; ++i) {
if (i != 1) {
ASSERT_OK(statuses_buf[i]);
} else {
ASSERT_TRUE(statuses_buf[i].IsCorruption());
}
}
}
}
TEST_F(BlobFileReaderTest, Malformed) {
// Write a blob file consisting of nothing but a header, and make sure we
// detect the error when we open it for reading
Options options;
options.env = mock_env_.get();
options.cf_paths.emplace_back(
test::PerThreadDBPath(mock_env_.get(), "BlobFileReaderTest_Malformed"),
0);
options.enable_blob_files = true;
ImmutableOptions immutable_options(options);
constexpr uint32_t column_family_id = 1;
constexpr uint64_t blob_file_number = 1;
{
constexpr bool has_ttl = false;
constexpr ExpirationRange expiration_range;
const std::string blob_file_path =
BlobFileName(immutable_options.cf_paths.front().path, blob_file_number);
std::unique_ptr<FSWritableFile> file;
ASSERT_OK(NewWritableFile(immutable_options.fs.get(), blob_file_path, &file,
FileOptions()));
std::unique_ptr<WritableFileWriter> file_writer(
new WritableFileWriter(std::move(file), blob_file_path, FileOptions(),
immutable_options.clock));
constexpr Statistics* statistics = nullptr;
constexpr bool use_fsync = false;
constexpr bool do_flush = false;
BlobLogWriter blob_log_writer(std::move(file_writer),
immutable_options.clock, statistics,
blob_file_number, use_fsync, do_flush);
BlobLogHeader header(column_family_id, kNoCompression, has_ttl,
expiration_range);
ASSERT_OK(blob_log_writer.WriteHeader(header));
}
constexpr HistogramImpl* blob_file_read_hist = nullptr;
std::unique_ptr<BlobFileReader> reader;
ASSERT_TRUE(BlobFileReader::Create(immutable_options, FileOptions(),
column_family_id, blob_file_read_hist,
blob_file_number, nullptr /*IOTracer*/,
&reader)
.IsCorruption());
}
TEST_F(BlobFileReaderTest, TTL) {
Options options;
options.env = mock_env_.get();
options.cf_paths.emplace_back(
test::PerThreadDBPath(mock_env_.get(), "BlobFileReaderTest_TTL"), 0);
options.enable_blob_files = true;
ImmutableOptions immutable_options(options);
constexpr uint32_t column_family_id = 1;
constexpr bool has_ttl = true;
constexpr ExpirationRange expiration_range;
constexpr uint64_t blob_file_number = 1;
constexpr char key[] = "key";
constexpr char blob[] = "blob";
uint64_t blob_offset = 0;
uint64_t blob_size = 0;
WriteBlobFile(immutable_options, column_family_id, has_ttl, expiration_range,
expiration_range, blob_file_number, key, blob, kNoCompression,
&blob_offset, &blob_size);
constexpr HistogramImpl* blob_file_read_hist = nullptr;
std::unique_ptr<BlobFileReader> reader;
ASSERT_TRUE(BlobFileReader::Create(immutable_options, FileOptions(),
column_family_id, blob_file_read_hist,
blob_file_number, nullptr /*IOTracer*/,
&reader)
.IsCorruption());
}
TEST_F(BlobFileReaderTest, ExpirationRangeInHeader) {
Options options;
options.env = mock_env_.get();
options.cf_paths.emplace_back(
test::PerThreadDBPath(mock_env_.get(),
"BlobFileReaderTest_ExpirationRangeInHeader"),
0);
options.enable_blob_files = true;
ImmutableOptions immutable_options(options);
constexpr uint32_t column_family_id = 1;
constexpr bool has_ttl = false;
const ExpirationRange expiration_range_header(
1, 2); // can be made constexpr when we adopt C++14
constexpr ExpirationRange expiration_range_footer;
constexpr uint64_t blob_file_number = 1;
constexpr char key[] = "key";
constexpr char blob[] = "blob";
uint64_t blob_offset = 0;
uint64_t blob_size = 0;
WriteBlobFile(immutable_options, column_family_id, has_ttl,
expiration_range_header, expiration_range_footer,
blob_file_number, key, blob, kNoCompression, &blob_offset,
&blob_size);
constexpr HistogramImpl* blob_file_read_hist = nullptr;
std::unique_ptr<BlobFileReader> reader;
ASSERT_TRUE(BlobFileReader::Create(immutable_options, FileOptions(),
column_family_id, blob_file_read_hist,
blob_file_number, nullptr /*IOTracer*/,
&reader)
.IsCorruption());
}
TEST_F(BlobFileReaderTest, ExpirationRangeInFooter) {
Options options;
options.env = mock_env_.get();
options.cf_paths.emplace_back(
test::PerThreadDBPath(mock_env_.get(),
"BlobFileReaderTest_ExpirationRangeInFooter"),
0);
options.enable_blob_files = true;
ImmutableOptions immutable_options(options);
constexpr uint32_t column_family_id = 1;
constexpr bool has_ttl = false;
constexpr ExpirationRange expiration_range_header;
const ExpirationRange expiration_range_footer(
1, 2); // can be made constexpr when we adopt C++14
constexpr uint64_t blob_file_number = 1;
constexpr char key[] = "key";
constexpr char blob[] = "blob";
uint64_t blob_offset = 0;
uint64_t blob_size = 0;
WriteBlobFile(immutable_options, column_family_id, has_ttl,
expiration_range_header, expiration_range_footer,
blob_file_number, key, blob, kNoCompression, &blob_offset,
&blob_size);
constexpr HistogramImpl* blob_file_read_hist = nullptr;
std::unique_ptr<BlobFileReader> reader;
ASSERT_TRUE(BlobFileReader::Create(immutable_options, FileOptions(),
column_family_id, blob_file_read_hist,
blob_file_number, nullptr /*IOTracer*/,
&reader)
.IsCorruption());
}
TEST_F(BlobFileReaderTest, IncorrectColumnFamily) {
Options options;
options.env = mock_env_.get();
options.cf_paths.emplace_back(
test::PerThreadDBPath(mock_env_.get(),
"BlobFileReaderTest_IncorrectColumnFamily"),
0);
options.enable_blob_files = true;
ImmutableOptions immutable_options(options);
constexpr uint32_t column_family_id = 1;
constexpr bool has_ttl = false;
constexpr ExpirationRange expiration_range;
constexpr uint64_t blob_file_number = 1;
constexpr char key[] = "key";
constexpr char blob[] = "blob";
uint64_t blob_offset = 0;
uint64_t blob_size = 0;
WriteBlobFile(immutable_options, column_family_id, has_ttl, expiration_range,
expiration_range, blob_file_number, key, blob, kNoCompression,
&blob_offset, &blob_size);
constexpr HistogramImpl* blob_file_read_hist = nullptr;
std::unique_ptr<BlobFileReader> reader;
constexpr uint32_t incorrect_column_family_id = 2;
ASSERT_TRUE(BlobFileReader::Create(immutable_options, FileOptions(),
incorrect_column_family_id,
blob_file_read_hist, blob_file_number,
nullptr /*IOTracer*/, &reader)
.IsCorruption());
}
TEST_F(BlobFileReaderTest, BlobCRCError) {
Options options;
options.env = mock_env_.get();
options.cf_paths.emplace_back(
test::PerThreadDBPath(mock_env_.get(), "BlobFileReaderTest_BlobCRCError"),
0);
options.enable_blob_files = true;
ImmutableOptions immutable_options(options);
constexpr uint32_t column_family_id = 1;
constexpr bool has_ttl = false;
constexpr ExpirationRange expiration_range;
constexpr uint64_t blob_file_number = 1;
constexpr char key[] = "key";
constexpr char blob[] = "blob";
uint64_t blob_offset = 0;
uint64_t blob_size = 0;
WriteBlobFile(immutable_options, column_family_id, has_ttl, expiration_range,
expiration_range, blob_file_number, key, blob, kNoCompression,
&blob_offset, &blob_size);
constexpr HistogramImpl* blob_file_read_hist = nullptr;
std::unique_ptr<BlobFileReader> reader;
ASSERT_OK(BlobFileReader::Create(
immutable_options, FileOptions(), column_family_id, blob_file_read_hist,
blob_file_number, nullptr /*IOTracer*/, &reader));
SyncPoint::GetInstance()->SetCallBack(
"BlobFileReader::VerifyBlob:CheckBlobCRC", [](void* arg) {
BlobLogRecord* const record = static_cast<BlobLogRecord*>(arg);
assert(record);
record->blob_crc = 0xfaceb00c;
});
SyncPoint::GetInstance()->EnableProcessing();
constexpr FilePrefetchBuffer* prefetch_buffer = nullptr;
PinnableSlice value;
uint64_t bytes_read = 0;
ASSERT_TRUE(reader
->GetBlob(ReadOptions(), key, blob_offset, blob_size,
kNoCompression, prefetch_buffer, &value,
&bytes_read)
.IsCorruption());
ASSERT_EQ(bytes_read, 0);
SyncPoint::GetInstance()->DisableProcessing();
SyncPoint::GetInstance()->ClearAllCallBacks();
}
TEST_F(BlobFileReaderTest, Compression) {
if (!Snappy_Supported()) {
return;
}
Options options;
options.env = mock_env_.get();
options.cf_paths.emplace_back(
test::PerThreadDBPath(mock_env_.get(), "BlobFileReaderTest_Compression"),
0);
options.enable_blob_files = true;
ImmutableOptions immutable_options(options);
constexpr uint32_t column_family_id = 1;
constexpr bool has_ttl = false;
constexpr ExpirationRange expiration_range;
constexpr uint64_t blob_file_number = 1;
constexpr char key[] = "key";
constexpr char blob[] = "blob";
uint64_t blob_offset = 0;
uint64_t blob_size = 0;
WriteBlobFile(immutable_options, column_family_id, has_ttl, expiration_range,
expiration_range, blob_file_number, key, blob,
kSnappyCompression, &blob_offset, &blob_size);
constexpr HistogramImpl* blob_file_read_hist = nullptr;
std::unique_ptr<BlobFileReader> reader;
ASSERT_OK(BlobFileReader::Create(
immutable_options, FileOptions(), column_family_id, blob_file_read_hist,
blob_file_number, nullptr /*IOTracer*/, &reader));
// Make sure the blob can be retrieved with and without checksum verification
ReadOptions read_options;
read_options.verify_checksums = false;
constexpr FilePrefetchBuffer* prefetch_buffer = nullptr;
{
PinnableSlice value;
uint64_t bytes_read = 0;
ASSERT_OK(reader->GetBlob(read_options, key, blob_offset, blob_size,
kSnappyCompression, prefetch_buffer, &value,
&bytes_read));
ASSERT_EQ(value, blob);
ASSERT_EQ(bytes_read, blob_size);
}
read_options.verify_checksums = true;
{
PinnableSlice value;
uint64_t bytes_read = 0;
ASSERT_OK(reader->GetBlob(read_options, key, blob_offset, blob_size,
kSnappyCompression, prefetch_buffer, &value,
&bytes_read));
ASSERT_EQ(value, blob);
constexpr uint64_t key_size = sizeof(key) - 1;
ASSERT_EQ(bytes_read,
BlobLogRecord::CalculateAdjustmentForRecordHeader(key_size) +
blob_size);
}
}
TEST_F(BlobFileReaderTest, UncompressionError) {
if (!Snappy_Supported()) {
return;
}
Options options;
options.env = mock_env_.get();
options.cf_paths.emplace_back(
test::PerThreadDBPath(mock_env_.get(),
"BlobFileReaderTest_UncompressionError"),
0);
options.enable_blob_files = true;
ImmutableOptions immutable_options(options);
constexpr uint32_t column_family_id = 1;
constexpr bool has_ttl = false;
constexpr ExpirationRange expiration_range;
constexpr uint64_t blob_file_number = 1;
constexpr char key[] = "key";
constexpr char blob[] = "blob";
uint64_t blob_offset = 0;
uint64_t blob_size = 0;
WriteBlobFile(immutable_options, column_family_id, has_ttl, expiration_range,
expiration_range, blob_file_number, key, blob,
kSnappyCompression, &blob_offset, &blob_size);
constexpr HistogramImpl* blob_file_read_hist = nullptr;
std::unique_ptr<BlobFileReader> reader;
ASSERT_OK(BlobFileReader::Create(
immutable_options, FileOptions(), column_family_id, blob_file_read_hist,
blob_file_number, nullptr /*IOTracer*/, &reader));
SyncPoint::GetInstance()->SetCallBack(
"BlobFileReader::UncompressBlobIfNeeded:TamperWithResult", [](void* arg) {
CacheAllocationPtr* const output =
static_cast<CacheAllocationPtr*>(arg);
assert(output);
output->reset();
});
SyncPoint::GetInstance()->EnableProcessing();
constexpr FilePrefetchBuffer* prefetch_buffer = nullptr;
PinnableSlice value;
uint64_t bytes_read = 0;
ASSERT_TRUE(reader
->GetBlob(ReadOptions(), key, blob_offset, blob_size,
kSnappyCompression, prefetch_buffer, &value,
&bytes_read)
.IsCorruption());
ASSERT_EQ(bytes_read, 0);
SyncPoint::GetInstance()->DisableProcessing();
SyncPoint::GetInstance()->ClearAllCallBacks();
}
class BlobFileReaderIOErrorTest
: public testing::Test,
public testing::WithParamInterface<std::string> {
protected:
BlobFileReaderIOErrorTest() : sync_point_(GetParam()) {
mock_env_.reset(MockEnv::Create(Env::Default()));
fault_injection_env_.reset(new FaultInjectionTestEnv(mock_env_.get()));
}
std::unique_ptr<Env> mock_env_;
std::unique_ptr<FaultInjectionTestEnv> fault_injection_env_;
std::string sync_point_;
};
INSTANTIATE_TEST_CASE_P(BlobFileReaderTest, BlobFileReaderIOErrorTest,
::testing::ValuesIn(std::vector<std::string>{
"BlobFileReader::OpenFile:GetFileSize",
"BlobFileReader::OpenFile:NewRandomAccessFile",
"BlobFileReader::ReadHeader:ReadFromFile",
"BlobFileReader::ReadFooter:ReadFromFile",
"BlobFileReader::GetBlob:ReadFromFile"}));
TEST_P(BlobFileReaderIOErrorTest, IOError) {
// Simulates an I/O error during the specified step
Options options;
options.env = fault_injection_env_.get();
options.cf_paths.emplace_back(
test::PerThreadDBPath(fault_injection_env_.get(),
"BlobFileReaderIOErrorTest_IOError"),
0);
options.enable_blob_files = true;
ImmutableOptions immutable_options(options);
constexpr uint32_t column_family_id = 1;
constexpr bool has_ttl = false;
constexpr ExpirationRange expiration_range;
constexpr uint64_t blob_file_number = 1;
constexpr char key[] = "key";
constexpr char blob[] = "blob";
uint64_t blob_offset = 0;
uint64_t blob_size = 0;
WriteBlobFile(immutable_options, column_family_id, has_ttl, expiration_range,
expiration_range, blob_file_number, key, blob, kNoCompression,
&blob_offset, &blob_size);
SyncPoint::GetInstance()->SetCallBack(sync_point_, [this](void* /* arg */) {
fault_injection_env_->SetFilesystemActive(false,
Status::IOError(sync_point_));
});
SyncPoint::GetInstance()->EnableProcessing();
constexpr HistogramImpl* blob_file_read_hist = nullptr;
std::unique_ptr<BlobFileReader> reader;
const Status s = BlobFileReader::Create(
immutable_options, FileOptions(), column_family_id, blob_file_read_hist,
blob_file_number, nullptr /*IOTracer*/, &reader);
const bool fail_during_create =
(sync_point_ != "BlobFileReader::GetBlob:ReadFromFile");
if (fail_during_create) {
ASSERT_TRUE(s.IsIOError());
} else {
ASSERT_OK(s);
constexpr FilePrefetchBuffer* prefetch_buffer = nullptr;
PinnableSlice value;
uint64_t bytes_read = 0;
ASSERT_TRUE(reader
->GetBlob(ReadOptions(), key, blob_offset, blob_size,
kNoCompression, prefetch_buffer, &value,
&bytes_read)
.IsIOError());
ASSERT_EQ(bytes_read, 0);
}
SyncPoint::GetInstance()->DisableProcessing();
SyncPoint::GetInstance()->ClearAllCallBacks();
}
class BlobFileReaderDecodingErrorTest
: public testing::Test,
public testing::WithParamInterface<std::string> {
protected:
BlobFileReaderDecodingErrorTest() : sync_point_(GetParam()) {
mock_env_.reset(MockEnv::Create(Env::Default()));
}
std::unique_ptr<Env> mock_env_;
std::string sync_point_;
};
INSTANTIATE_TEST_CASE_P(BlobFileReaderTest, BlobFileReaderDecodingErrorTest,
::testing::ValuesIn(std::vector<std::string>{
"BlobFileReader::ReadHeader:TamperWithResult",
"BlobFileReader::ReadFooter:TamperWithResult",
"BlobFileReader::GetBlob:TamperWithResult"}));
TEST_P(BlobFileReaderDecodingErrorTest, DecodingError) {
Options options;
options.env = mock_env_.get();
options.cf_paths.emplace_back(
test::PerThreadDBPath(mock_env_.get(),
"BlobFileReaderDecodingErrorTest_DecodingError"),
0);
options.enable_blob_files = true;
ImmutableOptions immutable_options(options);
constexpr uint32_t column_family_id = 1;
constexpr bool has_ttl = false;
constexpr ExpirationRange expiration_range;
constexpr uint64_t blob_file_number = 1;
constexpr char key[] = "key";
constexpr char blob[] = "blob";
uint64_t blob_offset = 0;
uint64_t blob_size = 0;
WriteBlobFile(immutable_options, column_family_id, has_ttl, expiration_range,
expiration_range, blob_file_number, key, blob, kNoCompression,
&blob_offset, &blob_size);
SyncPoint::GetInstance()->SetCallBack(sync_point_, [](void* arg) {
Slice* const slice = static_cast<Slice*>(arg);
assert(slice);
assert(!slice->empty());
slice->remove_prefix(1);
});
SyncPoint::GetInstance()->EnableProcessing();
constexpr HistogramImpl* blob_file_read_hist = nullptr;
std::unique_ptr<BlobFileReader> reader;
const Status s = BlobFileReader::Create(
immutable_options, FileOptions(), column_family_id, blob_file_read_hist,
blob_file_number, nullptr /*IOTracer*/, &reader);
const bool fail_during_create =
sync_point_ != "BlobFileReader::GetBlob:TamperWithResult";
if (fail_during_create) {
ASSERT_TRUE(s.IsCorruption());
} else {
ASSERT_OK(s);
constexpr FilePrefetchBuffer* prefetch_buffer = nullptr;
PinnableSlice value;
uint64_t bytes_read = 0;
ASSERT_TRUE(reader
->GetBlob(ReadOptions(), key, blob_offset, blob_size,
kNoCompression, prefetch_buffer, &value,
&bytes_read)
.IsCorruption());
ASSERT_EQ(bytes_read, 0);
}
SyncPoint::GetInstance()->DisableProcessing();
SyncPoint::GetInstance()->ClearAllCallBacks();
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

View File

@ -1,100 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "db/blob/blob_garbage_meter.h"
#include "db/blob/blob_index.h"
#include "db/blob/blob_log_format.h"
#include "db/dbformat.h"
namespace ROCKSDB_NAMESPACE {
Status BlobGarbageMeter::ProcessInFlow(const Slice& key, const Slice& value) {
uint64_t blob_file_number = kInvalidBlobFileNumber;
uint64_t bytes = 0;
const Status s = Parse(key, value, &blob_file_number, &bytes);
if (!s.ok()) {
return s;
}
if (blob_file_number == kInvalidBlobFileNumber) {
return Status::OK();
}
flows_[blob_file_number].AddInFlow(bytes);
return Status::OK();
}
Status BlobGarbageMeter::ProcessOutFlow(const Slice& key, const Slice& value) {
uint64_t blob_file_number = kInvalidBlobFileNumber;
uint64_t bytes = 0;
const Status s = Parse(key, value, &blob_file_number, &bytes);
if (!s.ok()) {
return s;
}
if (blob_file_number == kInvalidBlobFileNumber) {
return Status::OK();
}
// Note: in order to measure the amount of additional garbage, we only need to
// track the outflow for preexisting files, i.e. those that also had inflow.
// (Newly written files would only have outflow.)
auto it = flows_.find(blob_file_number);
if (it == flows_.end()) {
return Status::OK();
}
it->second.AddOutFlow(bytes);
return Status::OK();
}
Status BlobGarbageMeter::Parse(const Slice& key, const Slice& value,
uint64_t* blob_file_number, uint64_t* bytes) {
assert(blob_file_number);
assert(*blob_file_number == kInvalidBlobFileNumber);
assert(bytes);
assert(*bytes == 0);
ParsedInternalKey ikey;
{
constexpr bool log_err_key = false;
const Status s = ParseInternalKey(key, &ikey, log_err_key);
if (!s.ok()) {
return s;
}
}
if (ikey.type != kTypeBlobIndex) {
return Status::OK();
}
BlobIndex blob_index;
{
const Status s = blob_index.DecodeFrom(value);
if (!s.ok()) {
return s;
}
}
if (blob_index.IsInlined() || blob_index.HasTTL()) {
return Status::Corruption("Unexpected TTL/inlined blob index");
}
*blob_file_number = blob_index.file_number();
*bytes =
blob_index.size() +
BlobLogRecord::CalculateAdjustmentForRecordHeader(ikey.user_key.size());
return Status::OK();
}
} // namespace ROCKSDB_NAMESPACE

View File

@ -1,102 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#include <cassert>
#include <cstdint>
#include <unordered_map>
#include "db/blob/blob_constants.h"
#include "rocksdb/rocksdb_namespace.h"
#include "rocksdb/status.h"
namespace ROCKSDB_NAMESPACE {
class Slice;
// A class that can be used to compute the amount of additional garbage
// generated by a compaction. It parses the keys and blob references in the
// input and output of a compaction, and aggregates the "inflow" and "outflow"
// on a per-blob file basis. The amount of additional garbage for any given blob
// file can then be computed by subtracting the outflow from the inflow.
class BlobGarbageMeter {
public:
// A class to store the number and total size of blobs on a per-blob file
// basis.
class BlobStats {
public:
void Add(uint64_t bytes) {
++count_;
bytes_ += bytes;
}
void Add(uint64_t count, uint64_t bytes) {
count_ += count;
bytes_ += bytes;
}
uint64_t GetCount() const { return count_; }
uint64_t GetBytes() const { return bytes_; }
private:
uint64_t count_ = 0;
uint64_t bytes_ = 0;
};
// A class to keep track of the "inflow" and the "outflow" and to compute the
// amount of additional garbage for a given blob file.
class BlobInOutFlow {
public:
void AddInFlow(uint64_t bytes) {
in_flow_.Add(bytes);
assert(IsValid());
}
void AddOutFlow(uint64_t bytes) {
out_flow_.Add(bytes);
assert(IsValid());
}
const BlobStats& GetInFlow() const { return in_flow_; }
const BlobStats& GetOutFlow() const { return out_flow_; }
bool IsValid() const {
return in_flow_.GetCount() >= out_flow_.GetCount() &&
in_flow_.GetBytes() >= out_flow_.GetBytes();
}
bool HasGarbage() const {
assert(IsValid());
return in_flow_.GetCount() > out_flow_.GetCount();
}
uint64_t GetGarbageCount() const {
assert(IsValid());
assert(HasGarbage());
return in_flow_.GetCount() - out_flow_.GetCount();
}
uint64_t GetGarbageBytes() const {
assert(IsValid());
assert(HasGarbage());
return in_flow_.GetBytes() - out_flow_.GetBytes();
}
private:
BlobStats in_flow_;
BlobStats out_flow_;
};
Status ProcessInFlow(const Slice& key, const Slice& value);
Status ProcessOutFlow(const Slice& key, const Slice& value);
const std::unordered_map<uint64_t, BlobInOutFlow>& flows() const {
return flows_;
}
private:
static Status Parse(const Slice& key, const Slice& value,
uint64_t* blob_file_number, uint64_t* bytes);
std::unordered_map<uint64_t, BlobInOutFlow> flows_;
};
} // namespace ROCKSDB_NAMESPACE

View File

@ -1,196 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "db/blob/blob_garbage_meter.h"
#include <string>
#include <vector>
#include "db/blob/blob_index.h"
#include "db/blob/blob_log_format.h"
#include "db/dbformat.h"
#include "test_util/testharness.h"
namespace ROCKSDB_NAMESPACE {
TEST(BlobGarbageMeterTest, MeasureGarbage) {
BlobGarbageMeter blob_garbage_meter;
struct BlobDescriptor {
std::string user_key;
uint64_t blob_file_number;
uint64_t offset;
uint64_t size;
CompressionType compression_type;
bool has_in_flow;
bool has_out_flow;
uint64_t GetExpectedBytes() const {
return size +
BlobLogRecord::CalculateAdjustmentForRecordHeader(user_key.size());
}
};
// Note: blob file 4 has the same inflow and outflow and hence no additional
// garbage. Blob file 5 has less outflow than inflow and thus it does have
// additional garbage. Blob file 6 is a newly written file (i.e. no inflow,
// only outflow) and is thus not tracked by the meter.
std::vector<BlobDescriptor> blobs{
{"key", 4, 1234, 555, kLZ4Compression, true, true},
{"other_key", 4, 6789, 101010, kLZ4Compression, true, true},
{"yet_another_key", 5, 22222, 3456, kLZ4Compression, true, true},
{"foo_key", 5, 77777, 8888, kLZ4Compression, true, true},
{"bar_key", 5, 999999, 1212, kLZ4Compression, true, false},
{"baz_key", 5, 1234567, 890, kLZ4Compression, true, false},
{"new_key", 6, 7777, 9999, kNoCompression, false, true}};
for (const auto& blob : blobs) {
constexpr SequenceNumber seq = 123;
const InternalKey key(blob.user_key, seq, kTypeBlobIndex);
const Slice key_slice = key.Encode();
std::string value;
BlobIndex::EncodeBlob(&value, blob.blob_file_number, blob.offset, blob.size,
blob.compression_type);
const Slice value_slice(value);
if (blob.has_in_flow) {
ASSERT_OK(blob_garbage_meter.ProcessInFlow(key_slice, value_slice));
}
if (blob.has_out_flow) {
ASSERT_OK(blob_garbage_meter.ProcessOutFlow(key_slice, value_slice));
}
}
const auto& flows = blob_garbage_meter.flows();
ASSERT_EQ(flows.size(), 2);
{
const auto it = flows.find(4);
ASSERT_NE(it, flows.end());
const auto& flow = it->second;
constexpr uint64_t expected_count = 2;
const uint64_t expected_bytes =
blobs[0].GetExpectedBytes() + blobs[1].GetExpectedBytes();
const auto& in = flow.GetInFlow();
ASSERT_EQ(in.GetCount(), expected_count);
ASSERT_EQ(in.GetBytes(), expected_bytes);
const auto& out = flow.GetOutFlow();
ASSERT_EQ(out.GetCount(), expected_count);
ASSERT_EQ(out.GetBytes(), expected_bytes);
ASSERT_TRUE(flow.IsValid());
ASSERT_FALSE(flow.HasGarbage());
}
{
const auto it = flows.find(5);
ASSERT_NE(it, flows.end());
const auto& flow = it->second;
const auto& in = flow.GetInFlow();
constexpr uint64_t expected_in_count = 4;
const uint64_t expected_in_bytes =
blobs[2].GetExpectedBytes() + blobs[3].GetExpectedBytes() +
blobs[4].GetExpectedBytes() + blobs[5].GetExpectedBytes();
ASSERT_EQ(in.GetCount(), expected_in_count);
ASSERT_EQ(in.GetBytes(), expected_in_bytes);
const auto& out = flow.GetOutFlow();
constexpr uint64_t expected_out_count = 2;
const uint64_t expected_out_bytes =
blobs[2].GetExpectedBytes() + blobs[3].GetExpectedBytes();
ASSERT_EQ(out.GetCount(), expected_out_count);
ASSERT_EQ(out.GetBytes(), expected_out_bytes);
ASSERT_TRUE(flow.IsValid());
ASSERT_TRUE(flow.HasGarbage());
ASSERT_EQ(flow.GetGarbageCount(), expected_in_count - expected_out_count);
ASSERT_EQ(flow.GetGarbageBytes(), expected_in_bytes - expected_out_bytes);
}
}
TEST(BlobGarbageMeterTest, PlainValue) {
constexpr char user_key[] = "user_key";
constexpr SequenceNumber seq = 123;
const InternalKey key(user_key, seq, kTypeValue);
const Slice key_slice = key.Encode();
constexpr char value[] = "value";
const Slice value_slice(value);
BlobGarbageMeter blob_garbage_meter;
ASSERT_OK(blob_garbage_meter.ProcessInFlow(key_slice, value_slice));
ASSERT_OK(blob_garbage_meter.ProcessOutFlow(key_slice, value_slice));
ASSERT_TRUE(blob_garbage_meter.flows().empty());
}
TEST(BlobGarbageMeterTest, CorruptInternalKey) {
constexpr char corrupt_key[] = "i_am_corrupt";
const Slice key_slice(corrupt_key);
constexpr char value[] = "value";
const Slice value_slice(value);
BlobGarbageMeter blob_garbage_meter;
ASSERT_NOK(blob_garbage_meter.ProcessInFlow(key_slice, value_slice));
ASSERT_NOK(blob_garbage_meter.ProcessOutFlow(key_slice, value_slice));
}
TEST(BlobGarbageMeterTest, CorruptBlobIndex) {
constexpr char user_key[] = "user_key";
constexpr SequenceNumber seq = 123;
const InternalKey key(user_key, seq, kTypeBlobIndex);
const Slice key_slice = key.Encode();
constexpr char value[] = "i_am_not_a_blob_index";
const Slice value_slice(value);
BlobGarbageMeter blob_garbage_meter;
ASSERT_NOK(blob_garbage_meter.ProcessInFlow(key_slice, value_slice));
ASSERT_NOK(blob_garbage_meter.ProcessOutFlow(key_slice, value_slice));
}
TEST(BlobGarbageMeterTest, InlinedTTLBlobIndex) {
constexpr char user_key[] = "user_key";
constexpr SequenceNumber seq = 123;
const InternalKey key(user_key, seq, kTypeBlobIndex);
const Slice key_slice = key.Encode();
constexpr uint64_t expiration = 1234567890;
constexpr char inlined_value[] = "inlined";
std::string value;
BlobIndex::EncodeInlinedTTL(&value, expiration, inlined_value);
const Slice value_slice(value);
BlobGarbageMeter blob_garbage_meter;
ASSERT_NOK(blob_garbage_meter.ProcessInFlow(key_slice, value_slice));
ASSERT_NOK(blob_garbage_meter.ProcessOutFlow(key_slice, value_slice));
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

View File

@ -3,13 +3,13 @@
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#ifndef ROCKSDB_LITE
#include <sstream>
#include <string>
#include "rocksdb/compression_type.h"
#include "rocksdb/options.h"
#include "util/coding.h"
#include "util/compression.h"
#include "util/string_util.h"
namespace ROCKSDB_NAMESPACE {
@ -52,9 +52,6 @@ class BlobIndex {
BlobIndex() : type_(Type::kUnknown) {}
BlobIndex(const BlobIndex&) = default;
BlobIndex& operator=(const BlobIndex&) = default;
bool IsInlined() const { return type_ == Type::kInlinedTTL; }
bool HasTTL() const {
@ -86,19 +83,14 @@ class BlobIndex {
return size_;
}
CompressionType compression() const {
assert(!IsInlined());
return compression_;
}
Status DecodeFrom(Slice slice) {
static const std::string kErrorMessage = "Error while decoding blob index";
assert(slice.size() > 0);
type_ = static_cast<Type>(*slice.data());
if (type_ >= Type::kUnknown) {
return Status::Corruption(kErrorMessage,
"Unknown blob index type: " +
std::to_string(static_cast<char>(type_)));
return Status::Corruption(
kErrorMessage,
"Unknown blob index type: " + ToString(static_cast<char>(type_)));
}
slice = Slice(slice.data() + 1, slice.size() - 1);
if (HasTTL()) {
@ -126,8 +118,7 @@ class BlobIndex {
oss << "[inlined blob] value:" << value_.ToString(output_hex);
} else {
oss << "[blob ref] file:" << file_number_ << " offset:" << offset_
<< " size:" << size_
<< " compression: " << CompressionTypeToString(compression_);
<< " size:" << size_;
}
if (HasTTL()) {
@ -185,3 +176,4 @@ class BlobIndex {
};
} // namespace ROCKSDB_NAMESPACE
#endif // ROCKSDB_LITE

View File

@ -3,6 +3,7 @@
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
#ifndef ROCKSDB_LITE
#include "db/blob/blob_log_format.h"
@ -10,6 +11,7 @@
#include "util/crc32c.h"
namespace ROCKSDB_NAMESPACE {
namespace blob_db {
void BlobLogHeader::EncodeTo(std::string* dst) {
assert(dst != nullptr);
@ -142,4 +144,6 @@ Status BlobLogRecord::CheckBlobCRC() const {
return Status::OK();
}
} // namespace blob_db
} // namespace ROCKSDB_NAMESPACE
#endif // ROCKSDB_LITE

View File

@ -7,6 +7,9 @@
#pragma once
#ifndef ROCKSDB_LITE
#include <limits>
#include <memory>
#include <utility>
@ -16,9 +19,11 @@
#include "rocksdb/types.h"
namespace ROCKSDB_NAMESPACE {
namespace blob_db {
constexpr uint32_t kMagicNumber = 2395959; // 0x00248f37
constexpr uint32_t kVersion1 = 1;
constexpr uint64_t kNoExpiration = std::numeric_limits<uint64_t>::max();
using ExpirationRange = std::pair<uint64_t, uint64_t>;
@ -104,14 +109,6 @@ struct BlobLogRecord {
// header include fields up to blob CRC
static constexpr size_t kHeaderSize = 32;
// Note that the offset field of BlobIndex actually points to the blob value
// as opposed to the start of the blob record. The following method can
// be used to calculate the adjustment needed to read the blob record header.
static constexpr uint64_t CalculateAdjustmentForRecordHeader(
uint64_t key_size) {
return key_size + kHeaderSize;
}
uint64_t key_size = 0;
uint64_t value_size = 0;
uint64_t expiration = 0;
@ -131,19 +128,6 @@ struct BlobLogRecord {
Status CheckBlobCRC() const;
};
// Checks whether a blob offset is potentially valid or not.
inline bool IsValidBlobOffset(uint64_t value_offset, uint64_t key_size,
uint64_t value_size, uint64_t file_size) {
if (value_offset <
BlobLogHeader::kSize + BlobLogRecord::kHeaderSize + key_size) {
return false;
}
if (value_offset + value_size + BlobLogFooter::kSize > file_size) {
return false;
}
return true;
}
} // namespace blob_db
} // namespace ROCKSDB_NAMESPACE
#endif // ROCKSDB_LITE

View File

@ -3,35 +3,31 @@
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
#ifndef ROCKSDB_LITE
#include "db/blob/blob_log_sequential_reader.h"
#include "db/blob/blob_log_reader.h"
#include <algorithm>
#include "file/random_access_file_reader.h"
#include "monitoring/statistics.h"
#include "util/stop_watch.h"
namespace ROCKSDB_NAMESPACE {
namespace blob_db {
BlobLogSequentialReader::BlobLogSequentialReader(
std::unique_ptr<RandomAccessFileReader>&& file_reader, SystemClock* clock,
Statistics* statistics)
Reader::Reader(std::unique_ptr<RandomAccessFileReader>&& file_reader, Env* env,
Statistics* statistics)
: file_(std::move(file_reader)),
clock_(clock),
env_(env),
statistics_(statistics),
buffer_(),
next_byte_(0) {}
BlobLogSequentialReader::~BlobLogSequentialReader() = default;
Status BlobLogSequentialReader::ReadSlice(uint64_t size, Slice* slice,
char* buf) {
assert(slice);
assert(file_);
StopWatch read_sw(clock_, statistics_, BLOB_DB_BLOB_FILE_READ_MICROS);
// TODO: rate limit `BlobLogSequentialReader` reads (it appears unused?)
Status s =
file_->Read(IOOptions(), next_byte_, static_cast<size_t>(size), slice,
buf, nullptr, Env::IO_TOTAL /* rate_limiter_priority */);
Status Reader::ReadSlice(uint64_t size, Slice* slice, char* buf) {
StopWatch read_sw(env_, statistics_, BLOB_DB_BLOB_FILE_READ_MICROS);
Status s = file_->Read(IOOptions(), next_byte_, static_cast<size_t>(size),
slice, buf, nullptr);
next_byte_ += size;
if (!s.ok()) {
return s;
@ -43,13 +39,9 @@ Status BlobLogSequentialReader::ReadSlice(uint64_t size, Slice* slice,
return s;
}
Status BlobLogSequentialReader::ReadHeader(BlobLogHeader* header) {
assert(header);
Status Reader::ReadHeader(BlobLogHeader* header) {
assert(file_.get() != nullptr);
assert(next_byte_ == 0);
static_assert(BlobLogHeader::kSize <= sizeof(header_buf_),
"Buffer is smaller than BlobLogHeader::kSize");
Status s = ReadSlice(BlobLogHeader::kSize, &buffer_, header_buf_);
if (!s.ok()) {
return s;
@ -62,13 +54,8 @@ Status BlobLogSequentialReader::ReadHeader(BlobLogHeader* header) {
return header->DecodeFrom(buffer_);
}
Status BlobLogSequentialReader::ReadRecord(BlobLogRecord* record,
ReadLevel level,
uint64_t* blob_offset) {
assert(record);
static_assert(BlobLogRecord::kHeaderSize <= sizeof(header_buf_),
"Buffer is smaller than BlobLogRecord::kHeaderSize");
Status Reader::ReadRecord(BlobLogRecord* record, ReadLevel level,
uint64_t* blob_offset) {
Status s = ReadSlice(BlobLogRecord::kHeaderSize, &buffer_, header_buf_);
if (!s.ok()) {
return s;
@ -114,21 +101,6 @@ Status BlobLogSequentialReader::ReadRecord(BlobLogRecord* record,
return s;
}
Status BlobLogSequentialReader::ReadFooter(BlobLogFooter* footer) {
assert(footer);
static_assert(BlobLogFooter::kSize <= sizeof(header_buf_),
"Buffer is smaller than BlobLogFooter::kSize");
Status s = ReadSlice(BlobLogFooter::kSize, &buffer_, header_buf_);
if (!s.ok()) {
return s;
}
if (buffer_.size() != BlobLogFooter::kSize) {
return Status::Corruption("EOF reached before file footer");
}
return footer->DecodeFrom(buffer_);
}
} // namespace blob_db
} // namespace ROCKSDB_NAMESPACE
#endif // ROCKSDB_LITE

82
db/blob/blob_log_reader.h Normal file
View File

@ -0,0 +1,82 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
#pragma once
#ifndef ROCKSDB_LITE
#include <memory>
#include <string>
#include "db/blob/blob_log_format.h"
#include "file/random_access_file_reader.h"
#include "rocksdb/env.h"
#include "rocksdb/slice.h"
#include "rocksdb/statistics.h"
#include "rocksdb/status.h"
namespace ROCKSDB_NAMESPACE {
class SequentialFileReader;
class Logger;
namespace blob_db {
/**
* Reader is a general purpose log stream reader implementation. The actual job
* of reading from the device is implemented by the SequentialFile interface.
*
* Please see Writer for details on the file and record layout.
*/
class Reader {
public:
enum ReadLevel {
kReadHeader,
kReadHeaderKey,
kReadHeaderKeyBlob,
};
// Create a reader that will return log records from "*file".
// "*file" must remain live while this Reader is in use.
Reader(std::unique_ptr<RandomAccessFileReader>&& file_reader, Env* env,
Statistics* statistics);
// No copying allowed
Reader(const Reader&) = delete;
Reader& operator=(const Reader&) = delete;
~Reader() = default;
Status ReadHeader(BlobLogHeader* header);
// Read the next record into *record. Returns true if read
// successfully, false if we hit end of the input. May use
// "*scratch" as temporary storage. The contents filled in *record
// will only be valid until the next mutating operation on this
// reader or the next mutation to *scratch.
// If blob_offset is non-null, return offset of the blob through it.
Status ReadRecord(BlobLogRecord* record, ReadLevel level = kReadHeader,
uint64_t* blob_offset = nullptr);
void ResetNextByte() { next_byte_ = 0; }
uint64_t GetNextByte() const { return next_byte_; }
private:
Status ReadSlice(uint64_t size, Slice* slice, char* buf);
const std::unique_ptr<RandomAccessFileReader> file_;
Env* env_;
Statistics* statistics_;
Slice buffer_;
char header_buf_[BlobLogRecord::kHeaderSize];
// which byte to read next. For asserting proper usage
uint64_t next_byte_;
};
} // namespace blob_db
} // namespace ROCKSDB_NAMESPACE
#endif // ROCKSDB_LITE

Some files were not shown because too many files have changed in this diff Show More