Compare commits

..

6 Commits
main ... 3.8.fb

Author SHA1 Message Date
sdong
7e4a06c699 One more include<functional> 2019-10-31 14:52:10 -07:00
sdong
853247543f Disable warning as error 2019-10-31 14:52:09 -07:00
sdong
a72f5f6f4f Add some include<functional> 2019-10-31 14:52:09 -07:00
sdong
9f22c0f9f4 [FB Internal] Point to the latest tool chain. 2019-10-31 14:52:09 -07:00
sdong
8f0ffa6afd [FB Only] use gcc-5 2017-07-17 21:13:44 -07:00
Igor Canadi
6c9f6a115f Fix signed/unsigned compile 2014-11-24 13:27:49 -05:00
2114 changed files with 103234 additions and 658282 deletions

10
.arcconfig Normal file
View File

@ -0,0 +1,10 @@
{
"project_id" : "rocksdb",
"conduit_uri" : "https://reviews.facebook.net/",
"copyright_holder" : "Facebook",
"load" : [
"linters"
],
"lint.engine" : "FacebookFbcodeLintEngine",
"lint.engine.single.linter" : "FbcodeCppLinter"
}

View File

@ -1,911 +0,0 @@
version: 2.1
orbs:
win: circleci/windows@2.4.0
aliases:
- &notify-on-main-failure
fail_only: true
only_for_branches: main
commands:
install-cmake-on-macos:
steps:
- run:
name: Install cmake on macos
command: |
HOMEBREW_NO_AUTO_UPDATE=1 brew install cmake
install-jdk8-on-macos:
steps:
- run:
name: Install JDK 8 on macos
command: |
brew install --cask adoptopenjdk/openjdk/adoptopenjdk8
increase-max-open-files-on-macos:
steps:
- run:
name: Increase max open files
command: |
sudo sysctl -w kern.maxfiles=1048576
sudo sysctl -w kern.maxfilesperproc=1048576
sudo launchctl limit maxfiles 1048576
pre-steps:
steps:
- checkout
- run:
name: Setup Environment Variables
command: |
echo "export GTEST_THROW_ON_FAILURE=0" >> $BASH_ENV
echo "export GTEST_OUTPUT=\"xml:/tmp/test-results/\"" >> $BASH_ENV
echo "export SKIP_FORMAT_BUCK_CHECKS=1" >> $BASH_ENV
echo "export GTEST_COLOR=1" >> $BASH_ENV
echo "export CTEST_OUTPUT_ON_FAILURE=1" >> $BASH_ENV
echo "export CTEST_TEST_TIMEOUT=300" >> $BASH_ENV
echo "export ZLIB_DOWNLOAD_BASE=https://rocksdb-deps.s3.us-west-2.amazonaws.com/pkgs/zlib" >> $BASH_ENV
echo "export BZIP2_DOWNLOAD_BASE=https://rocksdb-deps.s3.us-west-2.amazonaws.com/pkgs/bzip2" >> $BASH_ENV
echo "export SNAPPY_DOWNLOAD_BASE=https://rocksdb-deps.s3.us-west-2.amazonaws.com/pkgs/snappy" >> $BASH_ENV
echo "export LZ4_DOWNLOAD_BASE=https://rocksdb-deps.s3.us-west-2.amazonaws.com/pkgs/lz4" >> $BASH_ENV
echo "export ZSTD_DOWNLOAD_BASE=https://rocksdb-deps.s3.us-west-2.amazonaws.com/pkgs/zstd" >> $BASH_ENV
pre-steps-macos:
steps:
- pre-steps
post-steps:
steps:
- store_test_results: # store test result if there's any
path: /tmp/test-results
- store_artifacts: # store LOG for debugging if there's any
path: LOG
- run: # on fail, compress Test Logs for diagnosing the issue
name: Compress Test Logs
command: tar -cvzf t.tar.gz t
when: on_fail
- store_artifacts: # on fail, store Test Logs for diagnosing the issue
path: t.tar.gz
destination: test_logs
when: on_fail
install-clang-10:
steps:
- run:
name: Install Clang 10
command: |
wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
echo "deb http://apt.llvm.org/focal/ llvm-toolchain-focal-10 main" | sudo tee -a /etc/apt/sources.list
echo "deb-src http://apt.llvm.org/focal/ llvm-toolchain-focal-10 main" | sudo tee -a /etc/apt/sources.list
echo "APT::Acquire::Retries \"10\";" | sudo tee -a /etc/apt/apt.conf.d/80-retries # llvm.org unreliable
sudo apt-get update -y && sudo apt-get install -y clang-10
install-clang-13:
steps:
- run:
name: Install Clang 13
command: |
echo "deb http://apt.llvm.org/focal/ llvm-toolchain-focal-13 main" | sudo tee -a /etc/apt/sources.list
echo "deb-src http://apt.llvm.org/focal/ llvm-toolchain-focal-13 main" | sudo tee -a /etc/apt/sources.list
echo "APT::Acquire::Retries \"10\";" | sudo tee -a /etc/apt/apt.conf.d/80-retries # llvm.org unreliable
wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key|sudo apt-key add -
sudo apt-get update -y && sudo apt-get install -y clang-13
install-gflags:
steps:
- run:
name: Install gflags
command: |
sudo apt-get update -y && sudo apt-get install -y libgflags-dev
install-benchmark:
steps:
- run:
name: Install ninja build
command: sudo apt-get update -y && sudo apt-get install -y ninja-build
- run:
name: Install benchmark
command: |
git clone --depth 1 --branch v1.6.1 https://github.com/google/benchmark.git ~/benchmark
cd ~/benchmark && mkdir build && cd build
cmake .. -GNinja -DCMAKE_BUILD_TYPE=Release -DBENCHMARK_ENABLE_GTEST_TESTS=0
ninja && sudo ninja install
install-valgrind:
steps:
- run:
name: Install valgrind
command: sudo apt-get update -y && sudo apt-get install -y valgrind
upgrade-cmake:
steps:
- run:
name: Upgrade cmake
command: |
sudo apt remove --purge cmake
sudo snap install cmake --classic
install-gflags-on-macos:
steps:
- run:
name: Install gflags on macos
command: |
HOMEBREW_NO_AUTO_UPDATE=1 brew install gflags
install-gtest-parallel:
steps:
- run:
name: Install gtest-parallel
command: |
git clone --single-branch --branch master --depth 1 https://github.com/google/gtest-parallel.git ~/gtest-parallel
echo 'export PATH=$HOME/gtest-parallel:$PATH' >> $BASH_ENV
install-compression-libs:
steps:
- run:
name: Install compression libs
command: |
sudo apt-get update -y && sudo apt-get install -y libsnappy-dev zlib1g-dev libbz2-dev liblz4-dev libzstd-dev
install-libprotobuf-mutator:
steps:
- run:
name: Install libprotobuf-mutator libs
command: |
git clone --single-branch --branch master --depth 1 git@github.com:google/libprotobuf-mutator.git ~/libprotobuf-mutator
cd ~/libprotobuf-mutator && mkdir build && cd build
cmake .. -GNinja -DCMAKE_C_COMPILER=clang-13 -DCMAKE_CXX_COMPILER=clang++-13 -DCMAKE_BUILD_TYPE=Release -DLIB_PROTO_MUTATOR_DOWNLOAD_PROTOBUF=ON
ninja && sudo ninja install
- run:
name: Setup environment variables
command: |
echo "export PKG_CONFIG_PATH=/usr/local/OFF/:~/libprotobuf-mutator/build/external.protobuf/lib/pkgconfig/" >> $BASH_ENV
echo "export PROTOC_BIN=~/libprotobuf-mutator/build/external.protobuf/bin/protoc" >> $BASH_ENV
executors:
windows-2xlarge:
machine:
image: 'windows-server-2019-vs2019:stable'
resource_class: windows.2xlarge
shell: bash.exe
jobs:
build-macos:
macos:
xcode: 12.5.1
resource_class: large
environment:
ROCKSDB_DISABLE_JEMALLOC: 1 # jemalloc cause env_test hang, disable it for now
steps:
- increase-max-open-files-on-macos
- install-gflags-on-macos
- pre-steps-macos
- run: ulimit -S -n `ulimit -H -n` && OPT=-DCIRCLECI make V=1 J=32 -j32 all
- post-steps
build-macos-cmake:
macos:
xcode: 12.5.1
resource_class: large
parameters:
run_even_tests:
description: run even or odd tests, used to split tests to 2 groups
type: boolean
default: true
steps:
- increase-max-open-files-on-macos
- install-cmake-on-macos
- install-gflags-on-macos
- pre-steps-macos
- run:
name: "cmake generate project file"
command: ulimit -S -n `ulimit -H -n` && mkdir build && cd build && cmake -DWITH_GFLAGS=1 ..
- run:
name: "Build tests"
command: cd build && make V=1 -j32
- when:
condition: << parameters.run_even_tests >>
steps:
- run:
name: "Run even tests"
command: ulimit -S -n `ulimit -H -n` && cd build && ctest -j32 -I 0,,2
- when:
condition:
not: << parameters.run_even_tests >>
steps:
- run:
name: "Run odd tests"
command: ulimit -S -n `ulimit -H -n` && cd build && ctest -j32 -I 1,,2
- post-steps
build-linux:
machine:
image: ubuntu-2004:202111-02
resource_class: 2xlarge
steps:
- pre-steps
- install-gflags
- run: make V=1 J=32 -j32 check
- post-steps
build-linux-encrypted_env-no_compression:
machine:
image: ubuntu-2004:202111-02
resource_class: 2xlarge
steps:
- pre-steps
- install-gflags
- run: ENCRYPTED_ENV=1 ROCKSDB_DISABLE_SNAPPY=1 ROCKSDB_DISABLE_ZLIB=1 ROCKSDB_DISABLE_BZIP=1 ROCKSDB_DISABLE_LZ4=1 ROCKSDB_DISABLE_ZSTD=1 make V=1 J=32 -j32 check
- run: |
./sst_dump --help | egrep -q 'Supported compression types: kNoCompression$' # Verify no compiled in compression
- post-steps
build-linux-shared_lib-alt_namespace-status_checked:
machine:
image: ubuntu-2004:202111-02
resource_class: 2xlarge
steps:
- pre-steps
- install-gflags
- run: ASSERT_STATUS_CHECKED=1 TEST_UINT128_COMPAT=1 ROCKSDB_MODIFY_NPHASH=1 LIB_MODE=shared OPT="-DROCKSDB_NAMESPACE=alternative_rocksdb_ns" make V=1 -j32 check
- post-steps
build-linux-release:
machine:
image: ubuntu-2004:202111-02
resource_class: 2xlarge
steps:
- checkout # check out the code in the project directory
- run: make V=1 -j32 release
- run: if ./db_stress --version; then false; else true; fi # ensure without gflags
- install-gflags
- run: make V=1 -j32 release
- run: ./db_stress --version # ensure with gflags
- post-steps
build-linux-release-rtti:
machine:
image: ubuntu-2004:202111-02
resource_class: xlarge
steps:
- checkout # check out the code in the project directory
- run: make clean
- run: USE_RTTI=1 DEBUG_LEVEL=0 make V=1 -j16 static_lib tools db_bench
- run: if ./db_stress --version; then false; else true; fi # ensure without gflags
- run: sudo apt-get update -y && sudo apt-get install -y libgflags-dev
- run: make clean
- run: USE_RTTI=1 DEBUG_LEVEL=0 make V=1 -j16 static_lib tools db_bench
- run: ./db_stress --version # ensure with gflags
build-linux-lite:
machine:
image: ubuntu-2004:202111-02
resource_class: large
steps:
- pre-steps
- install-gflags
- run: LITE=1 make V=1 J=8 -j8 check
- post-steps
build-linux-lite-release:
machine:
image: ubuntu-2004:202111-02
resource_class: large
steps:
- checkout # check out the code in the project directory
- run: LITE=1 make V=1 -j8 release
- run: if ./db_stress --version; then false; else true; fi # ensure without gflags
- install-gflags
- run: LITE=1 make V=1 -j8 release
- run: ./db_stress --version # ensure with gflags
- post-steps
build-linux-clang-no_test_run:
machine:
image: ubuntu-2004:202111-02
resource_class: xlarge
steps:
- checkout # check out the code in the project directory
- run: sudo apt-get update -y && sudo apt-get install -y clang libgflags-dev libtbb-dev
- run: CC=clang CXX=clang++ USE_CLANG=1 PORTABLE=1 make V=1 -j16 all
- post-steps
build-linux-clang10-asan:
machine:
image: ubuntu-2004:202111-02
resource_class: 2xlarge
steps:
- pre-steps
- install-gflags
- install-clang-10
- run: COMPILE_WITH_ASAN=1 CC=clang-10 CXX=clang++-10 ROCKSDB_DISABLE_ALIGNED_NEW=1 USE_CLANG=1 make V=1 -j32 check # aligned new doesn't work for reason we haven't figured out
- post-steps
build-linux-clang10-mini-tsan:
machine:
image: ubuntu-2004:202111-02
resource_class: 2xlarge
# find test list by `make list_all_tests`
parameters:
start_test:
default: ""
type: string
end_test:
default: ""
type: string
steps:
- pre-steps
- install-gflags
- install-clang-10
- install-gtest-parallel
- run:
name: "Build unit tests"
command: |
echo "env: $(env)"
ROCKSDBTESTS_START=<<parameters.start_test>> ROCKSDBTESTS_END=<<parameters.end_test>> ROCKSDBTESTS_SUBSET_TESTS_TO_FILE=/tmp/test_list COMPILE_WITH_TSAN=1 CC=clang-10 CXX=clang++-10 ROCKSDB_DISABLE_ALIGNED_NEW=1 USE_CLANG=1 make V=1 -j32 --output-sync=target build_subset_tests
- run:
name: "Run unit tests in parallel"
command: |
sed -i 's/[[:space:]]*$//; s/ / \.\//g; s/.*/.\/&/' /tmp/test_list
cat /tmp/test_list
gtest-parallel $(</tmp/test_list) --output_dir=/tmp | cat # pipe to cat to continuously output status on circleci UI. Otherwise, no status will be printed while the job is running.
- post-steps
build-linux-clang10-ubsan:
machine:
image: ubuntu-2004:202111-02
resource_class: 2xlarge
steps:
- pre-steps
- install-gflags
- install-clang-10
- run: COMPILE_WITH_UBSAN=1 OPT="-fsanitize-blacklist=.circleci/ubsan_suppression_list.txt" CC=clang-10 CXX=clang++-10 ROCKSDB_DISABLE_ALIGNED_NEW=1 USE_CLANG=1 make V=1 -j32 ubsan_check # aligned new doesn't work for reason we haven't figured out
- post-steps
build-linux-valgrind:
machine:
image: ubuntu-2004:202111-02
resource_class: 2xlarge
steps:
- pre-steps
- install-gflags
- install-valgrind
- run: PORTABLE=1 make V=1 -j32 valgrind_test
- post-steps
build-linux-clang10-clang-analyze:
machine:
image: ubuntu-2004:202111-02
resource_class: 2xlarge
steps:
- pre-steps
- install-gflags
- install-clang-10
- run: sudo apt-get update -y && sudo apt-get install -y clang-tools-10
- run: CC=clang-10 CXX=clang++-10 ROCKSDB_DISABLE_ALIGNED_NEW=1 CLANG_ANALYZER="/usr/bin/clang++-10" CLANG_SCAN_BUILD=scan-build-10 USE_CLANG=1 make V=1 -j32 analyze # aligned new doesn't work for reason we haven't figured out. For unknown, reason passing "clang++-10" as CLANG_ANALYZER doesn't work, and we need a full path.
- post-steps
build-linux-cmake-with-folly:
machine:
image: ubuntu-2004:202111-02
resource_class: 2xlarge
steps:
- pre-steps
- install-gflags
- upgrade-cmake
- run: make checkout_folly
- run: (mkdir build && cd build && cmake -DUSE_FOLLY=1 -DWITH_GFLAGS=1 .. && make V=1 -j20 && ctest -j20)
- post-steps
build-linux-cmake-with-benchmark:
machine:
image: ubuntu-2004:202111-02
resource_class: 2xlarge
steps:
- pre-steps
- install-gflags
- install-benchmark
- run: (mkdir build && cd build && cmake -DWITH_GFLAGS=1 -DWITH_BENCHMARK=1 .. && make V=1 -j20 && ctest -j20)
- post-steps
build-linux-unity-and-headers:
docker: # executor type
- image: gcc:latest
environment:
EXTRA_CXXFLAGS: -mno-avx512f # Warnings-as-error in avx512fintrin.h, would be used on newer hardware
resource_class: large
steps:
- checkout # check out the code in the project directory
- run: apt-get update -y && apt-get install -y libgflags-dev
- run: make V=1 -j8 unity_test
- run: make V=1 -j8 -k check-headers # could be moved to a different build
- post-steps
build-linux-gcc-7-with-folly:
machine:
image: ubuntu-2004:202111-02
resource_class: 2xlarge
steps:
- pre-steps
- run: sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test && sudo apt-get update -y && sudo apt-get install gcc-7 g++-7 libgflags-dev
- run: make checkout_folly
- run: USE_FOLLY=1 CC=gcc-7 CXX=g++-7 V=1 make -j32 check
- post-steps
build-linux-gcc-8-no_test_run:
machine:
image: ubuntu-2004:202111-02
resource_class: xlarge
steps:
- pre-steps
- run: sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test && sudo apt-get update -y && sudo apt-get install gcc-8 g++-8 libgflags-dev
- run: CC=gcc-8 CXX=g++-8 V=1 make -j16 all
- post-steps
build-linux-gcc-10-cxx20-no_test_run:
machine:
image: ubuntu-2004:202111-02
resource_class: xlarge
steps:
- pre-steps
- run: sudo apt-get update -y && sudo apt-get install gcc-10 g++-10 libgflags-dev
- run: CC=gcc-10 CXX=g++-10 V=1 ROCKSDB_CXX_STANDARD=c++20 make -j16 all
- post-steps
build-linux-gcc-11-no_test_run:
machine:
image: ubuntu-2004:202111-02
resource_class: xlarge
steps:
- pre-steps
- run: sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test && sudo apt-get update -y && sudo apt-get install gcc-11 g++-11 libgflags-dev
- install-benchmark
- run: CC=gcc-11 CXX=g++-11 V=1 make -j16 all microbench
- post-steps
build-linux-clang-13-no_test_run:
machine:
image: ubuntu-2004:202111-02
resource_class: xlarge
steps:
- pre-steps
- install-clang-13
- install-benchmark
- run: CC=clang-13 CXX=clang++-13 USE_CLANG=1 make -j16 all microbench
- post-steps
# Ensure ASAN+UBSAN with folly, and full testsuite with clang 13
build-linux-clang-13-asan-ubsan-with-folly:
machine:
image: ubuntu-2004:202111-02
resource_class: 2xlarge
steps:
- pre-steps
- install-clang-13
- install-gflags
- run: make checkout_folly
- run: CC=clang-13 CXX=clang++-13 USE_CLANG=1 USE_FOLLY=1 COMPILE_WITH_UBSAN=1 COMPILE_WITH_ASAN=1 make -j32 check
- post-steps
# This job is only to make sure the microbench tests are able to run, the benchmark result is not meaningful as the CI host is changing.
build-linux-run-microbench:
machine:
image: ubuntu-2004:202111-02
resource_class: 2xlarge
steps:
- pre-steps
- install-benchmark
- run: DEBUG_LEVEL=0 make -j32 run_microbench
- post-steps
build-linux-mini-crashtest:
machine:
image: ubuntu-2004:202111-02
resource_class: large
steps:
- pre-steps
- install-gflags
- install-compression-libs
- run: ulimit -S -n `ulimit -H -n` && make V=1 -j8 CRASH_TEST_EXT_ARGS=--duration=960 blackbox_crash_test_with_atomic_flush
- post-steps
build-windows:
executor: windows-2xlarge
parameters:
extra_cmake_opt:
default: ""
type: string
vs_year:
default: "2019"
type: string
cmake_generator:
default: "Visual Studio 16 2019"
type: string
environment:
THIRDPARTY_HOME: C:/Users/circleci/thirdparty
CMAKE_HOME: C:/Users/circleci/thirdparty/cmake-3.16.4-win64-x64
CMAKE_BIN: C:/Users/circleci/thirdparty/cmake-3.16.4-win64-x64/bin/cmake.exe
SNAPPY_HOME: C:/Users/circleci/thirdparty/snappy-1.1.7
SNAPPY_INCLUDE: C:/Users/circleci/thirdparty/snappy-1.1.7;C:/Users/circleci/thirdparty/snappy-1.1.7/build
SNAPPY_LIB_DEBUG: C:/Users/circleci/thirdparty/snappy-1.1.7/build/Debug/snappy.lib
VS_YEAR: <<parameters.vs_year>>
CMAKE_GENERATOR: <<parameters.cmake_generator>>
steps:
- checkout
- run:
name: "Setup VS"
command: |
if [[ "${VS_YEAR}" == "2019" ]]; then
echo "VS2019 already present."
elif [[ "${VS_YEAR}" == "2017" ]]; then
echo "Installing VS2017..."
powershell .circleci/vs2017_install.ps1
elif [[ "${VS_YEAR}" == "2015" ]]; then
echo "Installing VS2015..."
powershell .circleci/vs2015_install.ps1
fi
- store_artifacts:
path: \Users\circleci\AppData\Local\Temp\vslogs.zip
- run:
name: "Install thirdparty dependencies"
command: |
mkdir ${THIRDPARTY_HOME}
cd ${THIRDPARTY_HOME}
echo "Installing CMake..."
curl --fail --silent --show-error --output cmake-3.16.4-win64-x64.zip --location https://github.com/Kitware/CMake/releases/download/v3.16.4/cmake-3.16.4-win64-x64.zip
unzip -q cmake-3.16.4-win64-x64.zip
echo "Building Snappy dependency..."
curl --fail --silent --show-error --output snappy-1.1.7.zip --location https://github.com/google/snappy/archive/1.1.7.zip
unzip -q snappy-1.1.7.zip
cd snappy-1.1.7
mkdir build
cd build
${CMAKE_BIN} -G "${CMAKE_GENERATOR}" ..
msbuild.exe Snappy.sln -maxCpuCount -property:Configuration=Debug -property:Platform=x64
- run:
name: "Build RocksDB"
command: |
mkdir build
cd build
${CMAKE_BIN} -G "${CMAKE_GENERATOR}" -DCMAKE_BUILD_TYPE=Debug -DOPTDBG=1 -DPORTABLE=1 -DSNAPPY=1 -DJNI=1 << parameters.extra_cmake_opt >> ..
cd ..
echo "Building with VS version: ${CMAKE_GENERATOR}"
msbuild.exe build/rocksdb.sln -maxCpuCount -property:Configuration=Debug -property:Platform=x64
- run:
name: "Test RocksDB"
shell: powershell.exe
command: |
build_tools\run_ci_db_test.ps1 -SuiteRun db_basic_test,db_test,db_test2,db_merge_operand_test,bloom_test,c_test,coding_test,crc32c_test,dynamic_bloom_test,env_basic_test,env_test,hash_test,random_test -Concurrency 16
build-linux-java:
machine:
image: ubuntu-2004:202111-02
resource_class: large
environment:
JAVA_HOME: /usr/lib/jvm/java-1.8.0-openjdk-amd64
steps:
- pre-steps
- install-gflags
- run:
name: "Set Java Environment"
command: |
echo "JAVA_HOME=${JAVA_HOME}"
echo 'export PATH=$JAVA_HOME/bin:$PATH' >> $BASH_ENV
which java && java -version
which javac && javac -version
- run:
name: "Test RocksDBJava"
command: make V=1 J=8 -j8 jtest
- post-steps
build-linux-java-static:
machine:
image: ubuntu-2004:202111-02
resource_class: large
environment:
JAVA_HOME: /usr/lib/jvm/java-1.8.0-openjdk-amd64
steps:
- pre-steps
- install-gflags
- run:
name: "Set Java Environment"
command: |
echo "JAVA_HOME=${JAVA_HOME}"
echo 'export PATH=$JAVA_HOME/bin:$PATH' >> $BASH_ENV
which java && java -version
which javac && javac -version
- run:
name: "Build RocksDBJava Static Library"
command: make V=1 J=8 -j8 rocksdbjavastatic
- post-steps
build-macos-java:
macos:
xcode: 12.5.1
resource_class: large
environment:
JAVA_HOME: /Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home
ROCKSDB_DISABLE_JEMALLOC: 1 # jemalloc causes java 8 crash
steps:
- increase-max-open-files-on-macos
- install-gflags-on-macos
- install-jdk8-on-macos
- pre-steps-macos
- run:
name: "Set Java Environment"
command: |
echo "JAVA_HOME=${JAVA_HOME}"
echo 'export PATH=$JAVA_HOME/bin:$PATH' >> $BASH_ENV
which java && java -version
which javac && javac -version
- run:
name: "Test RocksDBJava"
command: make V=1 J=16 -j16 jtest
- post-steps
build-macos-java-static:
macos:
xcode: 12.5.1
resource_class: large
environment:
JAVA_HOME: /Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home
steps:
- increase-max-open-files-on-macos
- install-gflags-on-macos
- install-cmake-on-macos
- install-jdk8-on-macos
- pre-steps-macos
- run:
name: "Set Java Environment"
command: |
echo "JAVA_HOME=${JAVA_HOME}"
echo 'export PATH=$JAVA_HOME/bin:$PATH' >> $BASH_ENV
which java && java -version
which javac && javac -version
- run:
name: "Build RocksDBJava x86 and ARM Static Libraries"
command: make V=1 J=16 -j16 rocksdbjavastaticosx
- post-steps
build-macos-java-static-universal:
macos:
xcode: 12.5.1
resource_class: large
environment:
JAVA_HOME: /Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home
steps:
- increase-max-open-files-on-macos
- install-gflags-on-macos
- install-cmake-on-macos
- install-jdk8-on-macos
- pre-steps-macos
- run:
name: "Set Java Environment"
command: |
echo "JAVA_HOME=${JAVA_HOME}"
echo 'export PATH=$JAVA_HOME/bin:$PATH' >> $BASH_ENV
which java && java -version
which javac && javac -version
- run:
name: "Build RocksDBJava Universal Binary Static Library"
command: make V=1 J=16 -j16 rocksdbjavastaticosx_ub
- post-steps
build-examples:
machine:
image: ubuntu-2004:202111-02
resource_class: large
steps:
- pre-steps
- install-gflags
- run:
name: "Build examples"
command: |
OPT=-DTRAVIS V=1 make -j4 static_lib && cd examples && make -j4
- post-steps
build-cmake-mingw:
machine:
image: ubuntu-2004:202111-02
resource_class: large
steps:
- pre-steps
- install-gflags
- run: sudo apt-get update -y && sudo apt-get install -y mingw-w64
- run: sudo update-alternatives --set x86_64-w64-mingw32-g++ /usr/bin/x86_64-w64-mingw32-g++-posix
- run:
name: "Build cmake-mingw"
command: |
sudo apt-get install snapd && sudo snap install cmake --beta --classic
export PATH=/snap/bin:$PATH
sudo apt-get install -y openjdk-8-jdk
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
export PATH=$JAVA_HOME/bin:$PATH
echo "JAVA_HOME=${JAVA_HOME}"
which java && java -version
which javac && javac -version
mkdir build && cd build && cmake -DJNI=1 -DWITH_GFLAGS=OFF .. -DCMAKE_C_COMPILER=x86_64-w64-mingw32-gcc -DCMAKE_CXX_COMPILER=x86_64-w64-mingw32-g++ -DCMAKE_SYSTEM_NAME=Windows && make -j4 rocksdb rocksdbjni
- post-steps
build-linux-non-shm:
machine:
image: ubuntu-2004:202111-02
resource_class: 2xlarge
environment:
TEST_TMPDIR: /tmp/rocksdb_test_tmp
steps:
- pre-steps
- install-gflags
- run: make V=1 -j32 check
- post-steps
build-linux-arm-test-full:
machine:
image: ubuntu-2004:202111-02
resource_class: arm.large
steps:
- pre-steps
- install-gflags
- run: make V=1 J=4 -j4 check
- post-steps
build-linux-arm:
machine:
image: ubuntu-2004:202111-02
resource_class: arm.large
steps:
- pre-steps
- install-gflags
- run: ROCKSDBTESTS_PLATFORM_DEPENDENT=only make V=1 J=4 -j4 all_but_some_tests check_some
- post-steps
build-linux-arm-cmake-no_test_run:
machine:
image: ubuntu-2004:202111-02
resource_class: arm.large
environment:
JAVA_HOME: /usr/lib/jvm/java-8-openjdk-arm64
steps:
- pre-steps
- install-gflags
- run:
name: "Set Java Environment"
command: |
echo "JAVA_HOME=${JAVA_HOME}"
echo 'export PATH=$JAVA_HOME/bin:$PATH' >> $BASH_ENV
which java && java -version
which javac && javac -version
- run:
name: "Build with cmake"
command: |
mkdir build
cd build
cmake -DCMAKE_BUILD_TYPE=Release -DWITH_TESTS=0 -DWITH_GFLAGS=1 -DWITH_BENCHMARK_TOOLS=0 -DWITH_TOOLS=0 -DWITH_CORE_TOOLS=1 ..
make -j4
- run:
name: "Build Java with cmake"
command: |
rm -rf build
mkdir build
cd build
cmake -DJNI=1 -DCMAKE_BUILD_TYPE=Release -DWITH_GFLAGS=1 ..
make -j4 rocksdb rocksdbjni
- post-steps
build-format-compatible:
machine:
image: ubuntu-2004:202111-02
resource_class: 2xlarge
steps:
- pre-steps
- install-gflags
- install-compression-libs
- run:
name: "test"
command: |
export TEST_TMPDIR=/dev/shm/rocksdb
rm -rf /dev/shm/rocksdb
mkdir /dev/shm/rocksdb
tools/check_format_compatible.sh
- post-steps
build-fuzzers:
machine:
image: ubuntu-2004:202111-02
resource_class: large
steps:
- pre-steps
- install-clang-13
- run: sudo apt-get update -y && sudo apt-get install -y cmake ninja-build binutils liblzma-dev libz-dev pkg-config autoconf libtool
- install-libprotobuf-mutator
- run:
name: "Build rocksdb lib"
command: CC=clang-13 CXX=clang++-13 USE_CLANG=1 make -j4 static_lib
- run:
name: "Build fuzzers"
command: cd fuzz && make sst_file_writer_fuzzer db_fuzzer db_map_fuzzer
- post-steps
workflows:
version: 2
jobs-linux-run-tests:
jobs:
- build-linux
- build-linux-cmake-with-folly
- build-linux-gcc-7-with-folly
- build-linux-cmake-with-benchmark
- build-linux-encrypted_env-no_compression
- build-linux-lite
jobs-linux-run-tests-san:
jobs:
- build-linux-clang10-asan
- build-linux-clang10-ubsan
- build-linux-clang10-mini-tsan:
start_test: ""
end_test: "env_test"
- build-linux-clang10-mini-tsan:
start_test: "env_test"
end_test: ""
- build-linux-shared_lib-alt_namespace-status_checked
jobs-linux-no-test-run:
jobs:
- build-linux-release
- build-linux-release-rtti
- build-linux-lite-release
- build-examples
- build-fuzzers
- build-linux-clang-no_test_run
- build-linux-clang-13-no_test_run
- build-linux-gcc-8-no_test_run
- build-linux-gcc-10-cxx20-no_test_run
- build-linux-gcc-11-no_test_run
- build-linux-arm-cmake-no_test_run
jobs-linux-other-checks:
jobs:
- build-linux-clang10-clang-analyze
- build-linux-unity-and-headers
- build-linux-mini-crashtest
jobs-windows:
jobs:
- build-windows:
name: "build-windows-vs2019"
- build-windows:
name: "build-windows-vs2019-cxx20"
extra_cmake_opt: -DCMAKE_CXX_STANDARD=20
- build-windows:
name: "build-windows-vs2017"
vs_year: "2017"
cmake_generator: "Visual Studio 15 Win64"
- build-cmake-mingw
jobs-java:
jobs:
- build-linux-java
- build-linux-java-static
- build-macos-java
- build-macos-java-static
- build-macos-java-static-universal
jobs-macos:
jobs:
- build-macos
- build-macos-cmake:
run_even_tests: true
- build-macos-cmake:
run_even_tests: false
jobs-linux-arm:
jobs:
- build-linux-arm
nightly:
triggers:
- schedule:
cron: "0 9 * * *"
filters:
branches:
only:
- main
jobs:
- build-format-compatible
- build-linux-arm-test-full
- build-linux-run-microbench
- build-linux-non-shm
- build-linux-clang-13-asan-ubsan-with-folly
- build-linux-valgrind

View File

@ -1,6 +0,0 @@
# Supress UBSAN warnings related to stl_tree.h, e.g.
# UndefinedBehaviorSanitizer: undefined-behavior /usr/bin/../lib/gcc/x86_64-linux-gnu/5.4.0/../../../../include/c++/5.4.0/bits/stl_tree.h:1505:43 in
# /usr/bin/../lib/gcc/x86_64-linux-gnu/5.4.0/../../../../include/c++/5.4.0/bits/stl_tree.h:1505:43:
# runtime error: upcast of address 0x000001fa8820 with insufficient space for an object of type
# 'std::_Rb_tree_node<std::pair<const std::__cxx11::basic_string<char>, rocksdb::(anonymous namespace)::LockHoldingInfo> >'
src:*bits/stl_tree.h

View File

@ -1,24 +0,0 @@
$VS_DOWNLOAD_LINK = "https://go.microsoft.com/fwlink/?LinkId=691126"
$COLLECT_DOWNLOAD_LINK = "https://aka.ms/vscollect.exe"
curl.exe --retry 3 -kL $VS_DOWNLOAD_LINK --output vs_installer.exe
if ($LASTEXITCODE -ne 0) {
echo "Download of the VS 2015 installer failed"
exit 1
}
$VS_INSTALL_ARGS = @("/Quiet", "/NoRestart")
$process = Start-Process "${PWD}\vs_installer.exe" -ArgumentList $VS_INSTALL_ARGS -NoNewWindow -Wait -PassThru
Remove-Item -Path vs_installer.exe -Force
$exitCode = $process.ExitCode
if (($exitCode -ne 0) -and ($exitCode -ne 3010)) {
echo "VS 2015 installer exited with code $exitCode, which should be one of [0, 3010]."
curl.exe --retry 3 -kL $COLLECT_DOWNLOAD_LINK --output Collect.exe
if ($LASTEXITCODE -ne 0) {
echo "Download of the VS Collect tool failed."
exit 1
}
Start-Process "${PWD}\Collect.exe" -NoNewWindow -Wait -PassThru
New-Item -Path "C:\w\build-results" -ItemType "directory" -Force
Copy-Item -Path "C:\Users\circleci\AppData\Local\Temp\vslogs.zip" -Destination "C:\w\build-results\"
exit 1
}
echo "VS 2015 installed."

View File

@ -1,35 +0,0 @@
$VS_DOWNLOAD_LINK = "https://aka.ms/vs/15/release/vs_buildtools.exe"
$COLLECT_DOWNLOAD_LINK = "https://aka.ms/vscollect.exe"
$VS_INSTALL_ARGS = @("--nocache","--quiet","--wait", "--add Microsoft.VisualStudio.Workload.VCTools",
"--add Microsoft.VisualStudio.Component.VC.Tools.14.13",
"--add Microsoft.Component.MSBuild",
"--add Microsoft.VisualStudio.Component.Roslyn.Compiler",
"--add Microsoft.VisualStudio.Component.TextTemplating",
"--add Microsoft.VisualStudio.Component.VC.CoreIde",
"--add Microsoft.VisualStudio.Component.VC.Redist.14.Latest",
"--add Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Core",
"--add Microsoft.VisualStudio.Component.VC.Tools.x86.x64",
"--add Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Win81")
curl.exe --retry 3 -kL $VS_DOWNLOAD_LINK --output vs_installer.exe
if ($LASTEXITCODE -ne 0) {
echo "Download of the VS 2017 installer failed"
exit 1
}
$process = Start-Process "${PWD}\vs_installer.exe" -ArgumentList $VS_INSTALL_ARGS -NoNewWindow -Wait -PassThru
Remove-Item -Path vs_installer.exe -Force
$exitCode = $process.ExitCode
if (($exitCode -ne 0) -and ($exitCode -ne 3010)) {
echo "VS 2017 installer exited with code $exitCode, which should be one of [0, 3010]."
curl.exe --retry 3 -kL $COLLECT_DOWNLOAD_LINK --output Collect.exe
if ($LASTEXITCODE -ne 0) {
echo "Download of the VS Collect tool failed."
exit 1
}
Start-Process "${PWD}\Collect.exe" -NoNewWindow -Wait -PassThru
New-Item -Path "C:\w\build-results" -ItemType "directory" -Force
Copy-Item -Path "C:\Users\circleci\AppData\Local\Temp\vslogs.zip" -Destination "C:\w\build-results\"
exit 1
}
echo "VS 2017 installed."

View File

@ -1,44 +0,0 @@
name: Check buck targets and code format
on: [push, pull_request]
jobs:
check:
name: Check TARGETS file and code format
runs-on: ubuntu-latest
steps:
- name: Checkout feature branch
uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Fetch from upstream
run: |
git remote add upstream https://github.com/facebook/rocksdb.git && git fetch upstream
- name: Where am I
run: |
echo git status && git status
echo "git remote -v" && git remote -v
echo git branch && git branch
- name: Setup Python
uses: actions/setup-python@v1
- name: Install Dependencies
run: python -m pip install --upgrade pip
- name: Install argparse
run: pip install argparse
- name: Download clang-format-diff.py
uses: wei/wget@v1
with:
args: https://raw.githubusercontent.com/llvm/llvm-project/release/12.x/clang/tools/clang-format/clang-format-diff.py
- name: Check format
run: VERBOSE_CHECK=1 make check-format
- name: Compare buckify output
run: make check-buck-targets
- name: Simple source code checks
run: make check-sources

60
.gitignore vendored
View File

@ -1,5 +1,5 @@
make_config.mk
rocksdb.pc
TARGETS
build_config.mk
*.a
*.arc
@ -8,7 +8,6 @@ rocksdb.pc
*.gcda
*.gcno
*.o
*.o.tmp
*.so
*.so.*
*_test
@ -21,78 +20,27 @@ rocksdb.pc
*.d-e
*.o-*
*.swp
*~
*.vcxproj
*.vcxproj.filters
*.sln
*.cmake
.watchmanconfig
CMakeCache.txt
CMakeFiles/
build/
ldb
manifest_dump
sst_dump
blob_dump
block_cache_trace_analyzer
db_with_timestamp_basic_test
tools/block_cache_analyzer/*.pyc
column_aware_encoding_exp
util/build_version.cc
build_tools/VALGRIND_LOGS/
coverage/COVERAGE_REPORT
.gdbhistory
.gdb_history
package/
unity.a
.phutil_module_cache
tags
etags
rocksdb_dump
rocksdb_undump
db_test2
trace_analyzer
trace_analyzer_test
block_cache_trace_analyzer
io_tracer_parser
.DS_Store
.vs
.vscode
java/out
java/target
java/test-libs
java/*.log
java/include/org_rocksdb_*.h
.idea/
*.iml
rocksdb.cc
rocksdb.h
unity.cc
java/crossbuild/.vagrant
.vagrant/
java/**/*.asc
java/**.asc
java/javadoc
scan_build_report/
t
LOG
db_logs/
tp2/
fbcode/
fbcode
buckifier/*.pyc
buckifier/__pycache__
compile_commands.json
clang-format-diff.py
.py3/
fuzz/proto/gen/
fuzz/crash-*
cmake-build-*
third-party/folly/

View File

@ -1,4 +0,0 @@
extraction:
cpp:
index:
build_command: make static_lib

View File

@ -1,280 +1,19 @@
dist: xenial
language: cpp
os:
- linux
arch:
- arm64
- ppc64le
- s390x
compiler:
- clang
- gcc
cache:
- ccache
addons:
apt:
update: true
sources:
- ubuntu-toolchain-r-test
packages:
- libgflags-dev
- libbz2-dev
- liblz4-dev
- libsnappy-dev
- liblzma-dev # xv
- libzstd-dev
- zlib1g-dev
env:
- TEST_GROUP=platform_dependent # 16-18 minutes
- TEST_GROUP=1 # 33-35 minutes
- TEST_GROUP=2 # 18-20 minutes
- TEST_GROUP=3 # 20-22 minutes
- TEST_GROUP=4 # 12-14 minutes
# Run java tests
- JOB_NAME=java_test # 4-11 minutes
# Build ROCKSDB_LITE
- JOB_NAME=lite_build # 3-4 minutes
# Build examples
- JOB_NAME=examples # 5-7 minutes
- JOB_NAME=cmake # 3-5 minutes
- JOB_NAME=cmake-gcc8 # 3-5 minutes
- JOB_NAME=cmake-gcc9 # 3-5 minutes
- JOB_NAME=cmake-gcc9-c++20 # 3-5 minutes
- JOB_NAME=cmake-mingw # 3 minutes
matrix:
exclude:
- os : linux
arch: arm64
env: JOB_NAME=cmake-mingw
- os: linux
arch: ppc64le
env: JOB_NAME=cmake-mingw
- os: linux
arch: s390x
env: JOB_NAME=cmake-mingw
- os: linux
compiler: clang
- if: type = pull_request AND commit_message !~ /FULL_CI/
os: linux
arch: arm64
env: TEST_GROUP=platform_dependent
- if: type = pull_request AND commit_message !~ /FULL_CI/
os : linux
arch: arm64
env: TEST_GROUP=1
- if: type = pull_request AND commit_message !~ /FULL_CI/
os: linux
arch: ppc64le
env: TEST_GROUP=1
- if: type = pull_request AND commit_message !~ /FULL_CI/
os: linux
arch: s390x
env: TEST_GROUP=1
- if: type = pull_request AND commit_message !~ /FULL_CI/
os : linux
arch: arm64
env: TEST_GROUP=2
- if: type = pull_request AND commit_message !~ /FULL_CI/
os: linux
arch: ppc64le
env: TEST_GROUP=2
- if: type = pull_request AND commit_message !~ /FULL_CI/
os: linux
arch: s390x
env: TEST_GROUP=2
- if: type = pull_request AND commit_message !~ /FULL_CI/
os : linux
arch: arm64
env: TEST_GROUP=3
- if: type = pull_request AND commit_message !~ /FULL_CI/
os: linux
arch: ppc64le
env: TEST_GROUP=3
- if: type = pull_request AND commit_message !~ /FULL_CI/
os: linux
arch: s390x
env: TEST_GROUP=3
- if: type = pull_request AND commit_message !~ /FULL_CI/
os : linux
arch: arm64
env: TEST_GROUP=4
- if: type = pull_request AND commit_message !~ /FULL_CI/
os: linux
arch: ppc64le
env: TEST_GROUP=4
- if: type = pull_request AND commit_message !~ /FULL_CI/
os: linux
arch: s390x
env: TEST_GROUP=4
- if: type = pull_request AND commit_message !~ /FULL_CI/
os : linux
arch: arm64
env: JOB_NAME=cmake
- if: type = pull_request AND commit_message !~ /FULL_CI/ AND commit_message !~ /java/
os : linux
arch: arm64
env: JOB_NAME=java_test
- if: type = pull_request AND commit_message !~ /FULL_CI/ AND commit_message !~ /java/
os: linux
arch: ppc64le
env: JOB_NAME=java_test
- if: type = pull_request AND commit_message !~ /FULL_CI/ AND commit_message !~ /java/
os: linux
arch: s390x
env: JOB_NAME=java_test
- if: type = pull_request AND commit_message !~ /FULL_CI/
os : linux
arch: arm64
env: JOB_NAME=lite_build
- if: type = pull_request AND commit_message !~ /FULL_CI/
os: linux
arch: ppc64le
env: JOB_NAME=lite_build
- if: type = pull_request AND commit_message !~ /FULL_CI/
os: linux
arch: s390x
env: JOB_NAME=lite_build
- if: type = pull_request AND commit_message !~ /FULL_CI/
os : linux
arch: arm64
env: JOB_NAME=examples
- if: type = pull_request AND commit_message !~ /FULL_CI/
os: linux
arch: ppc64le
env: JOB_NAME=examples
- if: type = pull_request AND commit_message !~ /FULL_CI/
os: linux
arch: s390x
env: JOB_NAME=examples
- if: type = pull_request AND commit_message !~ /FULL_CI/
os : linux
arch: arm64
env: JOB_NAME=cmake-gcc8
- if: type = pull_request AND commit_message !~ /FULL_CI/
os: linux
arch: ppc64le
env: JOB_NAME=cmake-gcc8
- if: type = pull_request AND commit_message !~ /FULL_CI/
os: linux
arch: s390x
env: JOB_NAME=cmake-gcc8
- if: type = pull_request AND commit_message !~ /FULL_CI/
os : linux
arch: arm64
env: JOB_NAME=cmake-gcc9
- if: type = pull_request AND commit_message !~ /FULL_CI/
os: linux
arch: ppc64le
env: JOB_NAME=cmake-gcc9
- if: type = pull_request AND commit_message !~ /FULL_CI/
os: linux
arch: s390x
env: JOB_NAME=cmake-gcc9
- if: type = pull_request AND commit_message !~ /FULL_CI/
os : linux
arch: arm64
env: JOB_NAME=cmake-gcc9-c++20
- if: type = pull_request AND commit_message !~ /FULL_CI/
os: linux
arch: ppc64le
env: JOB_NAME=cmake-gcc9-c++20
- if: type = pull_request AND commit_message !~ /FULL_CI/
os: linux
arch: s390x
env: JOB_NAME=cmake-gcc9-c++20
install:
- CC=gcc-7 && CXX=g++-7
- if [ "${JOB_NAME}" == cmake-gcc8 ]; then
sudo apt-get install -y g++-8 || exit $?;
CC=gcc-8 && CXX=g++-8;
fi
- if [ "${JOB_NAME}" == cmake-gcc9 ] || [ "${JOB_NAME}" == cmake-gcc9-c++20 ]; then
sudo apt-get install -y g++-9 || exit $?;
CC=gcc-9 && CXX=g++-9;
fi
- if [ "${JOB_NAME}" == cmake-mingw ]; then
sudo apt-get install -y mingw-w64 || exit $?;
fi
- if [ "${CXX}" == "g++-7" ]; then
sudo apt-get install -y g++-7 || exit $?;
fi
- |
if [[ "${JOB_NAME}" == cmake* ]]; then
sudo apt-get remove -y cmake cmake-data
export CMAKE_DEB="cmake-3.14.5-Linux-$(uname -m).deb"
export CMAKE_DEB_URL="https://rocksdb-deps.s3-us-west-2.amazonaws.com/cmake/${CMAKE_DEB}"
curl --silent --fail --show-error --location --output "${CMAKE_DEB}" "${CMAKE_DEB_URL}" || exit $?
sudo dpkg -i "${CMAKE_DEB}" || exit $?
which cmake && cmake --version
fi
- |
if [[ "${JOB_NAME}" == java_test || "${JOB_NAME}" == cmake* ]]; then
# Ensure JDK 8
sudo apt-get install -y openjdk-8-jdk || exit $?
export PATH=/usr/lib/jvm/java-8-openjdk-$(dpkg --print-architecture)/bin:$PATH
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-$(dpkg --print-architecture)
echo "JAVA_HOME=${JAVA_HOME}"
which java && java -version
which javac && javac -version
fi
before_script:
# Increase the maximum number of open file descriptors, since some tests use
# more FDs than the default limit.
- ulimit -n 8192
script:
- date; ${CXX} --version
- if [ `command -v ccache` ]; then ccache -C; fi
- export MK_PARALLEL=4;
if [[ "$TRAVIS_CPU_ARCH" == s390x ]]; then
export MK_PARALLEL=1;
fi
- case $TEST_GROUP in
platform_dependent)
OPT=-DTRAVIS LIB_MODE=shared V=1 ROCKSDBTESTS_PLATFORM_DEPENDENT=only make -j$MK_PARALLEL all_but_some_tests check_some
;;
1)
OPT=-DTRAVIS LIB_MODE=shared V=1 ROCKSDBTESTS_PLATFORM_DEPENDENT=exclude ROCKSDBTESTS_END=backup_engine_test make -j$MK_PARALLEL check_some
;;
2)
OPT="-DTRAVIS -DROCKSDB_NAMESPACE=alternative_rocksdb_ns" LIB_MODE=shared V=1 make -j$MK_PARALLEL tools && OPT="-DTRAVIS -DROCKSDB_NAMESPACE=alternative_rocksdb_ns" LIB_MODE=shared V=1 ROCKSDBTESTS_PLATFORM_DEPENDENT=exclude ROCKSDBTESTS_START=backup_engine_test ROCKSDBTESTS_END=db_universal_compaction_test make -j$MK_PARALLEL check_some
;;
3)
OPT=-DTRAVIS LIB_MODE=shared V=1 ROCKSDBTESTS_PLATFORM_DEPENDENT=exclude ROCKSDBTESTS_START=db_universal_compaction_test ROCKSDBTESTS_END=table_properties_collector_test make -j$MK_PARALLEL check_some
;;
4)
OPT=-DTRAVIS LIB_MODE=shared V=1 ROCKSDBTESTS_PLATFORM_DEPENDENT=exclude ROCKSDBTESTS_START=table_properties_collector_test make -j$MK_PARALLEL check_some
;;
esac
- case $JOB_NAME in
java_test)
OPT=-DTRAVIS LIB_MODE=shared V=1 make rocksdbjava jtest
;;
lite_build)
OPT='-DTRAVIS -DROCKSDB_LITE' LIB_MODE=shared V=1 make -j$MK_PARALLEL all
;;
examples)
OPT=-DTRAVIS LIB_MODE=shared V=1 make -j$MK_PARALLEL static_lib && cd examples && make -j$MK_PARALLEL
;;
cmake-mingw)
sudo update-alternatives --set x86_64-w64-mingw32-g++ /usr/bin/x86_64-w64-mingw32-g++-posix;
mkdir build && cd build && cmake -DJNI=1 -DWITH_GFLAGS=OFF .. -DCMAKE_C_COMPILER=x86_64-w64-mingw32-gcc -DCMAKE_CXX_COMPILER=x86_64-w64-mingw32-g++ -DCMAKE_SYSTEM_NAME=Windows && make -j4 rocksdb rocksdbjni
;;
cmake*)
case $JOB_NAME in
*-c++20)
OPT=-DCMAKE_CXX_STANDARD=20
;;
esac
mkdir build && cd build && cmake -DCMAKE_BUILD_TYPE=Release -DWITH_TESTS=0 -DWITH_GFLAGS=0 -DWITH_BENCHMARK_TOOLS=0 -DWITH_TOOLS=0 -DWITH_CORE_TOOLS=1 .. && make -j$MK_PARALLEL && cd .. && rm -rf build && mkdir build && cd build && cmake -DJNI=1 .. -DCMAKE_BUILD_TYPE=Release $OPT && make -j$MK_PARALLEL rocksdb rocksdbjni
;;
esac
compiler: gcc
before_install:
# As of this writing (10 May 2014) the Travis build environment is Ubuntu 12.04,
# which needs the following ugly dependency incantations to build RocksDB:
- sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
- sudo apt-get update -qq
- sudo apt-get install -y -qq gcc-4.8 g++-4.8 zlib1g-dev libbz2-dev libsnappy-dev libjemalloc-dev
- sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-4.8 50
- sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-4.8 50
- wget https://gflags.googlecode.com/files/libgflags0_2.0-1_amd64.deb
- sudo dpkg -i libgflags0_2.0-1_amd64.deb
- wget https://gflags.googlecode.com/files/libgflags-dev_2.0-1_amd64.deb
- sudo dpkg -i libgflags-dev_2.0-1_amd64.deb
# Lousy hack to disable use and testing of fallocate, which doesn't behave quite
# as EnvPosixTest::AllocateTest expects within the Travis OpenVZ environment.
script: OPT=-DTRAVIS make unity && make clean && OPT=-DTRAVIS make check -j8
notifications:
email:
- leveldb@fb.com
email: false

View File

@ -1,6 +0,0 @@
{
"content_hash_warming": true,
"content_hash_max_items": 333333,
"hint_num_files_per_dir": 8,
"fsevents_latency": 0.05
}

View File

@ -9,4 +9,3 @@ Sanjay Ghemawat <sanjay@google.com>
# Partial list of contributors:
Kevin Regan <kevin.d.regan@gmail.com>
Johan Bilien <jobi@litl.com>
Matthew Von-Maszewski <https://github.com/matthewvon> (Basho Technologies)

File diff suppressed because it is too large Load Diff

View File

@ -1,77 +0,0 @@
# Code of Conduct
## Our Pledge
In the interest of fostering an open and welcoming environment, we as
contributors and maintainers pledge to make participation in our project and
our community a harassment-free experience for everyone, regardless of age, body
size, disability, ethnicity, sex characteristics, gender identity and expression,
level of experience, education, socio-economic status, nationality, personal
appearance, race, religion, or sexual identity and orientation.
## Our Standards
Examples of behavior that contributes to creating a positive environment
include:
* Using welcoming and inclusive language
* Being respectful of differing viewpoints and experiences
* Gracefully accepting constructive criticism
* Focusing on what is best for the community
* Showing empathy towards other community members
Examples of unacceptable behavior by participants include:
* The use of sexualized language or imagery and unwelcome sexual attention or
advances
* Trolling, insulting/derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or electronic
address, without explicit permission
* Other conduct which could reasonably be considered inappropriate in a
professional setting
## Our Responsibilities
Project maintainers are responsible for clarifying the standards of acceptable
behavior and are expected to take appropriate and fair corrective action in
response to any instances of unacceptable behavior.
Project maintainers have the right and responsibility to remove, edit, or
reject comments, commits, code, wiki edits, issues, and other contributions
that are not aligned to this Code of Conduct, or to ban temporarily or
permanently any contributor for other behaviors that they deem inappropriate,
threatening, offensive, or harmful.
## Scope
This Code of Conduct applies within all project spaces, and it also applies when
an individual is representing the project or its community in public spaces.
Examples of representing a project or community include using an official
project e-mail address, posting via an official social media account, or acting
as an appointed representative at an online or offline event. Representation of
a project may be further defined and clarified by project maintainers.
## Enforcement
Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported by contacting the project team at <opensource-conduct@fb.com>. All
complaints will be reviewed and investigated and will result in a response that
is deemed necessary and appropriate to the circumstances. The project team is
obligated to maintain confidentiality with regard to the reporter of an incident.
Further details of specific enforcement policies may be posted separately.
Project maintainers who do not follow or enforce the Code of Conduct in good
faith may face temporary or permanent repercussions as determined by other
members of the project's leadership.
## Attribution
This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
[homepage]: https://www.contributor-covenant.org
For answers to common questions about this code of conduct, see
https://www.contributor-covenant.org/faq

View File

@ -1,8 +1,5 @@
# Contributing to RocksDB
## Code of Conduct
The code of conduct is described in [`CODE_OF_CONDUCT.md`](CODE_OF_CONDUCT.md)
## Contributor License Agreement ("CLA")
In order to accept your pull request, we need you to submit a CLA. You
@ -15,3 +12,8 @@ Complete your CLA here: <https://code.facebook.com/cla>
If you prefer to sign a paper copy, we can send you a PDF. Send us an
e-mail or create a new github issue to request the CLA in PDF format.
## License
By contributing to RocksDB, you agree that your contributions will be
licensed under the [BSD License](LICENSE).

339
COPYING
View File

@ -1,339 +0,0 @@
GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
License is intended to guarantee your freedom to share and change free
software--to make sure the software is free for all its users. This
General Public License applies to most of the Free Software
Foundation's software and to any other program whose authors commit to
using it. (Some other Free Software Foundation software is covered by
the GNU Lesser General Public License instead.) You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
this service if you wish), that you receive source code or can get it
if you want it, that you can change the software or use pieces of it
in new free programs; and that you know you can do these things.
To protect your rights, we need to make restrictions that forbid
anyone to deny you these rights or to ask you to surrender the rights.
These restrictions translate to certain responsibilities for you if you
distribute copies of the software, or if you modify it.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must give the recipients all the rights that
you have. You must make sure that they, too, receive or can get the
source code. And you must show them these terms so they know their
rights.
We protect your rights with two steps: (1) copyright the software, and
(2) offer you this license which gives you legal permission to copy,
distribute and/or modify the software.
Also, for each author's protection and ours, we want to make certain
that everyone understands that there is no warranty for this free
software. If the software is modified by someone else and passed on, we
want its recipients to know that what they have is not the original, so
that any problems introduced by others will not reflect on the original
authors' reputations.
Finally, any free program is threatened constantly by software
patents. We wish to avoid the danger that redistributors of a free
program will individually obtain patent licenses, in effect making the
program proprietary. To prevent this, we have made it clear that any
patent must be licensed for everyone's free use or not licensed at all.
The precise terms and conditions for copying, distribution and
modification follow.
GNU GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License applies to any program or other work which contains
a notice placed by the copyright holder saying it may be distributed
under the terms of this General Public License. The "Program", below,
refers to any such program or work, and a "work based on the Program"
means either the Program or any derivative work under copyright law:
that is to say, a work containing the Program or a portion of it,
either verbatim or with modifications and/or translated into another
language. (Hereinafter, translation is included without limitation in
the term "modification".) Each licensee is addressed as "you".
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running the Program is not restricted, and the output from the Program
is covered only if its contents constitute a work based on the
Program (independent of having been made by running the Program).
Whether that is true depends on what the Program does.
1. You may copy and distribute verbatim copies of the Program's
source code as you receive it, in any medium, provided that you
conspicuously and appropriately publish on each copy an appropriate
copyright notice and disclaimer of warranty; keep intact all the
notices that refer to this License and to the absence of any warranty;
and give any other recipients of the Program a copy of this License
along with the Program.
You may charge a fee for the physical act of transferring a copy, and
you may at your option offer warranty protection in exchange for a fee.
2. You may modify your copy or copies of the Program or any portion
of it, thus forming a work based on the Program, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) You must cause the modified files to carry prominent notices
stating that you changed the files and the date of any change.
b) You must cause any work that you distribute or publish, that in
whole or in part contains or is derived from the Program or any
part thereof, to be licensed as a whole at no charge to all third
parties under the terms of this License.
c) If the modified program normally reads commands interactively
when run, you must cause it, when started running for such
interactive use in the most ordinary way, to print or display an
announcement including an appropriate copyright notice and a
notice that there is no warranty (or else, saying that you provide
a warranty) and that users may redistribute the program under
these conditions, and telling the user how to view a copy of this
License. (Exception: if the Program itself is interactive but
does not normally print such an announcement, your work based on
the Program is not required to print an announcement.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Program,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Program, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Program.
In addition, mere aggregation of another work not based on the Program
with the Program (or with a work based on the Program) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may copy and distribute the Program (or a work based on it,
under Section 2) in object code or executable form under the terms of
Sections 1 and 2 above provided that you also do one of the following:
a) Accompany it with the complete corresponding machine-readable
source code, which must be distributed under the terms of Sections
1 and 2 above on a medium customarily used for software interchange; or,
b) Accompany it with a written offer, valid for at least three
years, to give any third party, for a charge no more than your
cost of physically performing source distribution, a complete
machine-readable copy of the corresponding source code, to be
distributed under the terms of Sections 1 and 2 above on a medium
customarily used for software interchange; or,
c) Accompany it with the information you received as to the offer
to distribute corresponding source code. (This alternative is
allowed only for noncommercial distribution and only if you
received the program in object code or executable form with such
an offer, in accord with Subsection b above.)
The source code for a work means the preferred form of the work for
making modifications to it. For an executable work, complete source
code means all the source code for all modules it contains, plus any
associated interface definition files, plus the scripts used to
control compilation and installation of the executable. However, as a
special exception, the source code distributed need not include
anything that is normally distributed (in either source or binary
form) with the major components (compiler, kernel, and so on) of the
operating system on which the executable runs, unless that component
itself accompanies the executable.
If distribution of executable or object code is made by offering
access to copy from a designated place, then offering equivalent
access to copy the source code from the same place counts as
distribution of the source code, even though third parties are not
compelled to copy the source along with the object code.
4. You may not copy, modify, sublicense, or distribute the Program
except as expressly provided under this License. Any attempt
otherwise to copy, modify, sublicense or distribute the Program is
void, and will automatically terminate your rights under this License.
However, parties who have received copies, or rights, from you under
this License will not have their licenses terminated so long as such
parties remain in full compliance.
5. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Program or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Program (or any work based on the
Program), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Program or works based on it.
6. Each time you redistribute the Program (or any work based on the
Program), the recipient automatically receives a license from the
original licensor to copy, distribute or modify the Program subject to
these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties to
this License.
7. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Program at all. For example, if a patent
license would not permit royalty-free redistribution of the Program by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Program.
If any portion of this section is held invalid or unenforceable under
any particular circumstance, the balance of the section is intended to
apply and the section as a whole is intended to apply in other
circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system, which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
8. If the distribution and/or use of the Program is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Program under this License
may add an explicit geographical distribution limitation excluding
those countries, so that distribution is permitted only in or among
countries not thus excluded. In such case, this License incorporates
the limitation as if written in the body of this License.
9. The Free Software Foundation may publish revised and/or new versions
of the General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the Program
specifies a version number of this License which applies to it and "any
later version", you have the option of following the terms and conditions
either of that version or of any later version published by the Free
Software Foundation. If the Program does not specify a version number of
this License, you may choose any version ever published by the Free Software
Foundation.
10. If you wish to incorporate parts of the Program into other free
programs whose distribution conditions are different, write to the author
to ask for permission. For software which is copyrighted by the Free
Software Foundation, write to the Free Software Foundation; we sometimes
make exceptions for this. Our decision will be guided by the two goals
of preserving the free status of all derivatives of our free software and
of promoting the sharing and reuse of software generally.
NO WARRANTY
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
convey the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
Also add information on how to contact you by electronic and paper mail.
If the program is interactive, make it output a short notice like this
when it starts in an interactive mode:
Gnomovision version 69, Copyright (C) year name of author
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, the commands you use may
be called something other than `show w' and `show c'; they could even be
mouse-clicks or menu items--whatever suits your program.
You should also get your employer (if you work as a programmer) or your
school, if any, to sign a "copyright disclaimer" for the program, if
necessary. Here is a sample; alter the names:
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
`Gnomovision' (which makes passes at compilers) written by James Hacker.
<signature of Ty Coon>, 1 April 1989
Ty Coon, President of Vice
This General Public License does not permit incorporating your program into
proprietary programs. If your program is a subroutine library, you may
consider it more useful to permit linking proprietary applications with the
library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License.

View File

@ -1,24 +0,0 @@
# RocksDB default options change log (NO LONGER MAINTAINED)
## Unreleased
* delayed_write_rate takes the rate given by rate_limiter if not specified.
## 5.2
* Change the default of delayed slowdown value to 16MB/s and further increase the L0 stop condition to 36 files.
## 5.0 (11/17/2016)
* Options::allow_concurrent_memtable_write and Options::enable_write_thread_adaptive_yield are now true by default
* Options.level0_stop_writes_trigger default value changes from 24 to 32.
## 4.8.0 (5/2/2016)
* options.max_open_files changes from 5000 to -1. It improves performance, but users need to set file descriptor limit to be large enough and watch memory usage for index and bloom filters.
* options.base_background_compactions changes from max_background_compactions to 1. When users set higher max_background_compactions but the write throughput is not high, the writes are less spiky to disks.
* options.wal_recovery_mode changes from kTolerateCorruptedTailRecords to kPointInTimeRecovery. Avoid some false positive when file system or hardware reorder the writes for file data and metadata.
## 4.7.0 (4/8/2016)
* options.write_buffer_size changes from 4MB to 64MB.
* options.target_file_size_base changes from 2MB to 64MB.
* options.max_bytes_for_level_base changes from 10MB to 256MB.
* options.soft_pending_compaction_bytes_limit changes from 0 (disabled) to 64GB.
* options.hard_pending_compaction_bytes_limit changes from 0 (disabled) to 256GB.
* table_cache_numshardbits changes from 4 to 6.
* max_file_opening_threads changes from 1 to 16.

View File

@ -1,16 +0,0 @@
## RocksDB dump format
The version 1 RocksDB dump format is fairly simple:
1) The dump starts with the magic 8 byte identifier "ROCKDUMP"
2) The magic is followed by an 8 byte big-endian version which is 0x00000001.
3) Next are arbitrarily sized chunks of bytes prepended by 4 byte little endian number indicating how large each chunk is.
4) The first chunk is special and is a json string indicating some things about the creation of this dump. It contains the following keys:
* database-path: The path of the database this dump was created from.
* hostname: The hostname of the machine where the dump was created.
* creation-time: Unix seconds since epoc when this dump was created.
5) Following the info dump the slices paired into are key/value pairs.

1842
HISTORY.md

File diff suppressed because it is too large Load Diff

View File

@ -1,58 +1,41 @@
## Compilation
**Important**: If you plan to run RocksDB in production, don't compile using default
`make` or `make all`. That will compile RocksDB in debug mode, which is much slower
than release mode.
RocksDB's library should be able to compile without any dependency installed,
although we recommend installing some compression libraries (see below).
We do depend on newer gcc/clang with C++17 support (GCC >= 7, Clang >= 5).
We do depend on newer gcc with C++11 support.
There are few options when compiling RocksDB:
* [recommended] `make static_lib` will compile librocksdb.a, RocksDB static library. Compiles static library in release mode.
* [recommended] `make static_lib` will compile librocksdb.a, RocksDB static library.
* `make shared_lib` will compile librocksdb.so, RocksDB shared library. Compiles shared library in release mode.
* `make shared_lib` will compile librocksdb.so, RocksDB shared library.
* `make check` will compile and run all the unit tests. `make check` will compile RocksDB in debug mode.
* `make check` will compile and run all the unit tests
* `make all` will compile our static library, and all our tools and unit tests. Our tools
depend on gflags. You will need to have gflags installed to run `make all`. This will compile RocksDB in debug mode. Don't
use binaries compiled by `make all` in production.
depend on gflags. You will need to have gflags installed to run `make all`.
* if Intel SSE instruction set is supported, set USE_SSE=" -msse -msse4.2 " to make sure
SSE4.2 is used to speed up CRC32 when calculating data checksum.
* By default the binary we produce is optimized for the platform you're compiling on
(`-march=native` or the equivalent). SSE4.2 will thus be enabled automatically if your
CPU supports it. To print a warning if your CPU does not support SSE4.2, build with
`USE_SSE=1 make static_lib` or, if using CMake, `cmake -DFORCE_SSE42=ON`. If you want
to build a portable binary, add `PORTABLE=1` before your make commands, like this:
`PORTABLE=1 make static_lib`.
## Dependencies
* You can link RocksDB with following compression libraries:
- [zlib](http://www.zlib.net/) - a library for data compression.
- [bzip2](http://www.bzip.org/) - a library for data compression.
- [lz4](https://github.com/lz4/lz4) - a library for extremely fast data compression.
- [snappy](http://google.github.io/snappy/) - a library for fast
- [snappy](https://code.google.com/p/snappy/) - a library for fast
data compression.
- [zstandard](http://www.zstd.net) - Fast real-time compression
algorithm.
* All our tools depend on:
- [gflags](https://gflags.github.io/gflags/) - a library that handles
- [gflags](https://code.google.com/p/gflags/) - a library that handles
command line flags processing. You can compile rocksdb library even
if you don't have gflags installed.
* `make check` will also check code formatting, which requires [clang-format](https://clang.llvm.org/docs/ClangFormat.html)
* If you wish to build the RocksJava static target, then cmake is required for building Snappy.
* If you wish to run microbench (e.g, `make microbench`, `make ribbon_bench` or `cmake -DWITH_BENCHMARK=1`), Google benchmark >= 1.6.0 is needed.
## Supported platforms
* **Linux - Ubuntu**
* Upgrade your gcc to version at least 7 to get C++17 support.
* Upgrade your gcc to version at least 4.7 to get C++11 support.
* Install gflags. First, try: `sudo apt-get install libgflags-dev`
If this doesn't work and you're using Ubuntu, here's a nice tutorial:
(http://askubuntu.com/questions/312173/installing-gflags-12-04)
@ -60,153 +43,46 @@ to build a portable binary, add `PORTABLE=1` before your make commands, like thi
`sudo apt-get install libsnappy-dev`.
* Install zlib. Try: `sudo apt-get install zlib1g-dev`.
* Install bzip2: `sudo apt-get install libbz2-dev`.
* Install lz4: `sudo apt-get install liblz4-dev`.
* Install zstandard: `sudo apt-get install libzstd-dev`.
* **Linux - CentOS / RHEL**
* Upgrade your gcc to version at least 7 to get C++17 support
* **Linux - CentOS**
* Upgrade your gcc to version at least 4.7 to get C++11 support:
`yum install gcc47-c++`
* Install gflags:
git clone https://github.com/gflags/gflags.git
cd gflags
git checkout v2.0
wget https://gflags.googlecode.com/files/gflags-2.0-no-svn-files.tar.gz
tar -xzvf gflags-2.0-no-svn-files.tar.gz
cd gflags-2.0
./configure && make && sudo make install
**Notice**: Once installed, please add the include path for gflags to your `CPATH` environment variable and the
lib path to `LIBRARY_PATH`. If installed with default settings, the include path will be `/usr/local/include`
and the lib path will be `/usr/local/lib`.
* Install snappy:
sudo yum install snappy snappy-devel
wget https://snappy.googlecode.com/files/snappy-1.1.1.tar.gz
tar -xzvf snappy-1.1.1.tar.gz
cd snappy-1.1.1
./configure && make && sudo make install
* Install zlib:
sudo yum install zlib zlib-devel
sudo yum install zlib
sudo yum install zlib-devel
* Install bzip2:
sudo yum install bzip2 bzip2-devel
* Install lz4:
sudo yum install lz4-devel
* Install ASAN (optional for debugging):
sudo yum install libasan
* Install zstandard:
* With [EPEL](https://fedoraproject.org/wiki/EPEL):
sudo yum install libzstd-devel
* With CentOS 8:
sudo dnf install libzstd-devel
* From source:
wget https://github.com/facebook/zstd/archive/v1.1.3.tar.gz
mv v1.1.3.tar.gz zstd-1.1.3.tar.gz
tar zxvf zstd-1.1.3.tar.gz
cd zstd-1.1.3
make && sudo make install
sudo yum install bzip2
sudo yum install bzip2-devel
* **OS X**:
* Install latest C++ compiler that supports C++ 17:
* Install latest C++ compiler that supports C++ 11:
* Update XCode: run `xcode-select --install` (or install it from XCode App's settting).
* Install via [homebrew](http://brew.sh/).
* If you're first time developer in MacOS, you still need to run: `xcode-select --install` in your command line.
* run `brew tap homebrew/versions; brew install gcc7 --use-llvm` to install gcc 7 (or higher).
* run `brew install rocksdb`
* **FreeBSD** (11.01):
* You can either install RocksDB from the Ports system using `cd /usr/ports/databases/rocksdb && make install`, or you can follow the details below to install dependencies and compile from source code:
* Install the dependencies for RocksDB:
export BATCH=YES
cd /usr/ports/devel/gmake && make install
cd /usr/ports/devel/gflags && make install
cd /usr/ports/archivers/snappy && make install
cd /usr/ports/archivers/bzip2 && make install
cd /usr/ports/archivers/liblz4 && make install
cd /usr/ports/archivesrs/zstd && make install
cd /usr/ports/devel/git && make install
* Install the dependencies for RocksJava (optional):
export BATCH=yes
cd /usr/ports/java/openjdk7 && make install
* Build RocksDB from source:
cd ~
git clone https://github.com/facebook/rocksdb.git
cd rocksdb
gmake static_lib
* Build RocksJava from source (optional):
cd rocksdb
export JAVA_HOME=/usr/local/openjdk7
gmake rocksdbjava
* **OpenBSD** (6.3/-current):
* As RocksDB is not available in the ports yet you have to build it on your own:
* Install the dependencies for RocksDB:
pkg_add gmake gflags snappy bzip2 lz4 zstd git jdk bash findutils gnuwatch
* Build RocksDB from source:
cd ~
git clone https://github.com/facebook/rocksdb.git
cd rocksdb
gmake static_lib
* Build RocksJava from source (optional):
cd rocksdb
export JAVA_HOME=/usr/local/jdk-1.8.0
export PATH=$PATH:/usr/local/jdk-1.8.0/bin
gmake rocksdbjava
* run `brew tap homebrew/dupes; brew install gcc47 --use-llvm` to install gcc 4.7 (or higher).
* Install zlib, bzip2 and snappy libraries for compression.
* Install gflags. We have included a script
`build_tools/mac-install-gflags.sh`, which should automatically install it (execute this file instead of runing using "source" command).
If you installed gflags by other means (for example, `brew install gflags`),
please set `LIBRARY_PATH` and `CPATH` accordingly.
* Please note that some of the optimizations/features are disabled in OSX.
We did not run any production workloads on it.
* **iOS**:
* Run: `TARGET_OS=IOS make static_lib`. When building the project which uses rocksdb iOS library, make sure to define two important pre-processing macros: `ROCKSDB_LITE` and `IOS_CROSS_COMPILE`.
* **Windows** (Visual Studio 2017 to up):
* Read and follow the instructions at CMakeLists.txt
* Or install via [vcpkg](https://github.com/microsoft/vcpkg)
* run `vcpkg install rocksdb:x64-windows`
* **AIX 6.1**
* Install AIX Toolbox rpms with gcc
* Use these environment variables:
export PORTABLE=1
export CC=gcc
export AR="ar -X64"
export EXTRA_ARFLAGS=-X64
export EXTRA_CFLAGS=-maix64
export EXTRA_CXXFLAGS=-maix64
export PLATFORM_LDFLAGS="-static-libstdc++ -static-libgcc"
export LIBPATH=/opt/freeware/lib
export JAVA_HOME=/usr/java8_64
export PATH=/opt/freeware/bin:$PATH
* **Solaris Sparc**
* Install GCC 7 and higher.
* Use these environment variables:
export CC=gcc
export EXTRA_CFLAGS=-m64
export EXTRA_CXXFLAGS=-m64
export EXTRA_LDFLAGS=-m64
export PORTABLE=1
export PLATFORM_LDFLAGS="-static-libstdc++ -static-libgcc"

View File

@ -1,24 +0,0 @@
This is the list of all known third-party language bindings for RocksDB. If something is missing, please open a pull request to add it.
* Java - https://github.com/facebook/rocksdb/tree/main/java
* Python
* http://python-rocksdb.readthedocs.io/en/latest/
* http://pyrocksdb.readthedocs.org/en/latest/ (unmaintained)
* Perl - https://metacpan.org/pod/RocksDB
* Node.js - https://npmjs.org/package/rocksdb
* Go - https://github.com/tecbot/gorocksdb
* Ruby - http://rubygems.org/gems/rocksdb-ruby
* Haskell - https://hackage.haskell.org/package/rocksdb-haskell
* PHP - https://github.com/Photonios/rocksdb-php
* C#
* https://github.com/warrenfalk/rocksdb-sharp
* https://github.com/curiosity-ai/rocksdb-sharp
* Rust
* https://github.com/pingcap/rust-rocksdb (used in production fork of https://github.com/spacejam/rust-rocksdb)
* https://github.com/spacejam/rust-rocksdb
* https://github.com/bh1xuw/rust-rocks
* D programming language - https://github.com/b1naryth1ef/rocksdb
* Erlang - https://gitlab.com/barrel-db/erlang-rocksdb
* Elixir - https://github.com/urbint/rox
* Nim - https://github.com/status-im/nim-rocksdb
* Swift and Objective-C (iOS/OSX) - https://github.com/iabudiab/ObjectiveRocks

View File

@ -1,4 +1,10 @@
This contains code that is from LevelDB, and that code is under the following license:
BSD License
For rocksdb software
Copyright (c) 2014, Facebook, Inc.
All rights reserved.
---------------------------------------------------------------------
Copyright (c) 2011 The LevelDB Authors. All rights reserved.

View File

@ -1,202 +0,0 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

2622
Makefile

File diff suppressed because it is too large Load Diff

23
PATENTS Normal file
View File

@ -0,0 +1,23 @@
Additional Grant of Patent Rights
“Software” means the rocksdb software distributed by Facebook, Inc.
Facebook hereby grants you a perpetual, worldwide, royalty-free,
non-exclusive, irrevocable (subject to the termination provision below)
license under any rights in any patent claims owned by Facebook, to make,
have made, use, sell, offer to sell, import, and otherwise transfer the
Software. For avoidance of doubt, no license is granted under Facebook’s
rights in any patent claims that are infringed by (i) modifications to the
Software made by you or a third party, or (ii) the Software in combination
with any software or other technology provided by you or a third party.
The license granted hereunder will terminate, automatically and without
notice, for anyone that makes any claim (including by filing any lawsuit,
assertion or other action) alleging (a) direct, indirect, or contributory
infringement or inducement to infringe any patent: (i) by Facebook or any
of its subsidiaries or affiliates, whether or not such claim is related
to the Software, (ii) by any party if such claim arises in whole or in
part from any software, product or service of Facebook or any of its
subsidiaries or affiliates, whether or not such claim is related to the
Software, or (iii) by any party relating to the Software; or (b) that
any right in any patent claim of Facebook is invalid or unenforceable.

View File

@ -1,7 +0,0 @@
This is the list of all known third-party plugins for RocksDB. If something is missing, please open a pull request to add it.
* [Dedupfs](https://github.com/ajkr/dedupfs): an example for plugin developers to reference
* [HDFS](https://github.com/riversand963/rocksdb-hdfs-env): an Env used for interacting with HDFS. Migrated from main RocksDB repo
* [ZenFS](https://github.com/westerndigitalcorporation/zenfs): a file system for zoned block devices
* [RADOS](https://github.com/riversand963/rocksdb-rados-env): an Env used for interacting with RADOS. Migrated from RocksDB main repo.
* [PMEM](https://github.com/pmem/pmem-rocksdb-plugin): a collection of plugins to enable Persistent Memory on RocksDB.

View File

@ -1,23 +1,20 @@
## RocksDB: A Persistent Key-Value Store for Flash and RAM Storage
[![CircleCI Status](https://circleci.com/gh/facebook/rocksdb.svg?style=svg)](https://circleci.com/gh/facebook/rocksdb)
[![TravisCI Status](https://api.travis-ci.com/facebook/rocksdb.svg?branch=main)](https://travis-ci.com/github/facebook/rocksdb)
[![Appveyor Build status](https://ci.appveyor.com/api/projects/status/fbgfu0so3afcno78/branch/main?svg=true)](https://ci.appveyor.com/project/Facebook/rocksdb/branch/main)
[![PPC64le Build Status](http://140-211-168-68-openstack.osuosl.org:8080/buildStatus/icon?job=rocksdb&style=plastic)](http://140-211-168-68-openstack.osuosl.org:8080/job/rocksdb)
[![Build Status](https://travis-ci.org/facebook/rocksdb.svg?branch=master)](https://travis-ci.org/facebook/rocksdb)
RocksDB is developed and maintained by Facebook Database Engineering Team.
It is built on earlier work on [LevelDB](https://github.com/google/leveldb) by Sanjay Ghemawat (sanjay@google.com)
It is built on earlier work on LevelDB by Sanjay Ghemawat (sanjay@google.com)
and Jeff Dean (jeff@google.com)
This code is a library that forms the core building block for a fast
key-value server, especially suited for storing data on flash drives.
key value server, especially suited for storing data on flash drives.
It has a Log-Structured-Merge-Database (LSM) design with flexible tradeoffs
between Write-Amplification-Factor (WAF), Read-Amplification-Factor (RAF)
and Space-Amplification-Factor (SAF). It has multi-threaded compactions,
making it especially suitable for storing multiple terabytes of data in a
making it specially suitable for storing multiple terabytes of data in a
single database.
Start with example usage here: https://github.com/facebook/rocksdb/tree/main/examples
Start with example usage here: https://github.com/facebook/rocksdb/tree/master/examples
See the [github wiki](https://github.com/facebook/rocksdb/wiki) for more explanation.
@ -25,8 +22,4 @@ The public interface is in `include/`. Callers should not include or
rely on the details of any other header files in this package. Those
internal APIs may be changed without warning.
Questions and discussions are welcome on the [RocksDB Developers Public](https://www.facebook.com/groups/rocksdb.dev/) Facebook group and [email list](https://groups.google.com/g/rocksdb) on Google Groups.
## License
RocksDB is dual-licensed under both the GPLv2 (found in the COPYING file in the root directory) and Apache 2.0 License (found in the LICENSE.Apache file in the root directory). You may select, at your option, one of the above-listed licenses.
Design discussions are conducted in https://www.facebook.com/groups/rocksdb.dev/

View File

@ -4,11 +4,10 @@ RocksDBLite is a project focused on mobile use cases, which don't need a lot of
Some examples of the features disabled by ROCKSDB_LITE:
* compiled-in support for LDB tool
* No backup engine
* No support for replication (which we provide in form of TransactionalIterator)
* No backupable DB
* No support for replication (which we provide in form of TrasactionalIterator)
* No advanced monitoring tools
* No special-purpose memtables that are highly optimized for specific use cases
* No Transactions
When adding a new big feature to RocksDB, please add ROCKSDB_LITE compile guard if:
* Nobody from mobile really needs your feature,

5848
TARGETS

File diff suppressed because it is too large Load Diff

128
USERS.md
View File

@ -1,128 +0,0 @@
This document lists users of RocksDB and their use cases. If you are using RocksDB, please open a pull request and add yourself to the list.
## Facebook
At Facebook, we use RocksDB as storage engines in multiple data management services and a backend for many different stateful services, including:
1. MyRocks -- https://github.com/MySQLOnRocksDB/mysql-5.6
2. MongoRocks -- https://github.com/mongodb-partners/mongo-rocks
3. ZippyDB -- Facebook's distributed key-value store with Paxos-style replication, built on top of RocksDB.[1] https://www.youtube.com/watch?v=DfiN7pG0D0khtt
4. Laser -- Laser is a high query throughput, low (millisecond) latency, key-value storage service built on top of RocksDB.[1]
4. Dragon -- a distributed graph query engine. https://code.facebook.com/posts/1737605303120405/dragon-a-distributed-graph-query-engine/
5. Stylus -- a low-level stream processing framework writtenin C++.[1]
6. LogDevice -- a distributed data store for logs [2]
[1] https://research.facebook.com/publications/realtime-data-processing-at-facebook/
[2] https://code.facebook.com/posts/357056558062811/logdevice-a-distributed-data-store-for-logs/
## LinkedIn
Two different use cases at Linkedin are using RocksDB as a storage engine:
1. LinkedIn's follow feed for storing user's activities. Check out the blog post: https://engineering.linkedin.com/blog/2016/03/followfeed--linkedin-s-feed-made-faster-and-smarter
2. Apache Samza, open source framework for stream processing
Learn more about those use cases in a Tech Talk by Ankit Gupta and Naveen Somasundaram: http://www.youtube.com/watch?v=plqVp_OnSzg
## Yahoo
Yahoo is using RocksDB as a storage engine for their biggest distributed data store Sherpa. Learn more about it here: http://yahooeng.tumblr.com/post/120730204806/sherpa-scales-new-heights
## Baidu
[Apache Doris](http://doris.apache.org/master/en/) is a MPP analytical database engine released by Baidu. It [uses RocksDB](http://doris.apache.org/master/en/administrator-guide/operation/tablet-meta-tool.html) to manage its tablet's metadata.
## CockroachDB
CockroachDB is an open-source geo-replicated transactional database. They are using RocksDB as their storage engine. Check out their github: https://github.com/cockroachdb/cockroach
## DNANexus
DNANexus is using RocksDB to speed up processing of genomics data.
You can learn more from this great blog post by Mike Lin: http://devblog.dnanexus.com/faster-bam-sorting-with-samtools-and-rocksdb/
## Iron.io
Iron.io is using RocksDB as a storage engine for their distributed queueing system.
Learn more from Tech Talk by Reed Allman: http://www.youtube.com/watch?v=HTjt6oj-RL4
## Tango Me
Tango is using RocksDB as a graph storage to store all users' connection data and other social activity data.
## Turn
Turn is using RocksDB as a storage layer for their key/value store, serving at peak 2.4MM QPS out of different datacenters.
Check out our RocksDB Protobuf merge operator at: https://github.com/vladb38/rocksdb_protobuf
## Santander UK/Cloudera Profession Services
Check out their blog post: http://blog.cloudera.com/blog/2015/08/inside-santanders-near-real-time-data-ingest-architecture/
## Airbnb
Airbnb is using RocksDB as a storage engine for their personalized search service. You can learn more about it here: https://www.youtube.com/watch?v=ASQ6XMtogMs
## Alluxio
[Alluxio](https://www.alluxio.io) uses RocksDB to serve and scale file system metadata to beyond 1 Billion files. The detailed design and implementation is described in this engineering blog:
https://www.alluxio.io/blog/scalable-metadata-service-in-alluxio-storing-billions-of-files/
## Pinterest
Pinterest's Object Retrieval System uses RocksDB for storage: https://www.youtube.com/watch?v=MtFEVEs_2Vo
## Smyte
[Smyte](https://www.smyte.com/) uses RocksDB as the storage layer for their core key-value storage, high-performance counters and time-windowed HyperLogLog services.
## Rakuten Marketing
[Rakuten Marketing](https://marketing.rakuten.com/) uses RocksDB as the disk cache layer for the real-time bidding service in their Performance DSP.
## VWO, Wingify
[VWO's](https://vwo.com/) Smart Code checker and URL helper uses RocksDB to store all the URLs where VWO's Smart Code is installed.
## quasardb
[quasardb](https://www.quasardb.net) is a high-performance, distributed, transactional key-value database that integrates well with in-memory analytics engines such as Apache Spark.
quasardb uses a heavily tuned RocksDB as its persistence layer.
## Netflix
[Netflix](http://techblog.netflix.com/2016/05/application-data-caching-using-ssds.html) Netflix uses RocksDB on AWS EC2 instances with local SSD drives to cache application data.
## TiKV
[TiKV](https://github.com/pingcap/tikv) is a GEO-replicated, high-performance, distributed, transactional key-value database. TiKV is powered by Rust and Raft. TiKV uses RocksDB as its persistence layer.
## Apache Flink
[Apache Flink](https://flink.apache.org/news/2016/03/08/release-1.0.0.html) uses RocksDB to store state locally on a machine.
## Dgraph
[Dgraph](https://github.com/dgraph-io/dgraph) is an open-source, scalable, distributed, low latency, high throughput Graph database .They use RocksDB to store state locally on a machine.
## Uber
[Uber](http://eng.uber.com/cherami/) uses RocksDB as a durable and scalable task queue.
## 360 Pika
[360](http://www.360.cn/) [Pika](https://github.com/Qihoo360/pika) is a nosql compatible with redis. With the huge amount of data stored, redis may suffer for a capacity bottleneck, and pika was born for solving it. It has widely been used in many companies.
## LzLabs
LzLabs is using RocksDB as a storage engine in their multi-database distributed framework to store application configuration and user data.
## ProfaneDB
[ProfaneDB](https://profanedb.gitlab.io/) is a database for Protocol Buffers, and uses RocksDB for storage. It is accessible via gRPC, and the schema is defined using directly `.proto` files.
## IOTA Foundation
[IOTA Foundation](https://www.iota.org/) is using RocksDB in the [IOTA Reference Implementation (IRI)](https://github.com/iotaledger/iri) to store the local state of the Tangle. The Tangle is the first open-source distributed ledger powering the future of the Internet of Things.
## Avrio Project
[Avrio Project](http://avrio-project.github.io/avrio.network/) is using RocksDB in [Avrio ](https://github.com/avrio-project/avrio) to store blocks, account balances and data and other blockchain-releated data. Avrio is a multiblockchain decentralized cryptocurrency empowering monetary transactions.
## Crux
[Crux](https://github.com/juxt/crux) is a document database that uses RocksDB for local [EAV](https://en.wikipedia.org/wiki/Entity%E2%80%93attribute%E2%80%93value_model) index storage to enable point-in-time bitemporal Datalog queries. The "unbundled" architecture uses Kafka to provide horizontal scalability.
## Nebula Graph
[Nebula Graph](https://github.com/vesoft-inc/nebula) is a distributed, scalable, lightning-fast, open source graph database capable of hosting super large scale graphs with dozens of billions of vertices (nodes) and trillions of edges, with milliseconds of latency.
## YugabyteDB
[YugabyteDB](https://www.yugabyte.com/) is an open source, high performance, distributed SQL database that uses RocksDB as its storage layer. For more information, please see https://github.com/yugabyte/yugabyte-db/.
## ArangoDB
[ArangoDB](https://www.arangodb.com/) is a native multi-model database with flexible data models for documents, graphs, and key-values, for building high performance applications using a convenient SQL-like query language or JavaScript extensions. It uses RocksDB as its storage engine.
## Milvus
[Milvus](https://milvus.io/) is an open source vector database for unstructured data. It uses RocksDB not only as one of the supported kv storage engines, but also as a message queue.
## Kafka
[Kafka](https://kafka.apache.org/) is an open-source distributed event streaming platform, it uses RocksDB to store state in Kafka Streams: https://www.confluent.io/blog/how-to-tune-rocksdb-kafka-streams-state-stores-performance/.
## Solana Labs
[Solana](https://github.com/solana-labs/solana) is a fast, secure, scalable, and decentralized blockchain. It uses RocksDB as the underlying storage for its ledger store.
## Others
More databases using RocksDB can be found at [dbdb.io](https://dbdb.io/browse?embeds=rocksdb).

23
Vagrantfile vendored
View File

@ -1,4 +1,3 @@
# Vagrant file
Vagrant.configure("2") do |config|
config.vm.provider "virtualbox" do |v|
@ -14,26 +13,4 @@ Vagrant.configure("2") do |config|
box.vm.box = "chef/centos-6.5"
end
config.vm.define "centos7" do |box|
box.vm.box = "centos/7"
box.vm.provision "shell", path: "build_tools/setup_centos7.sh"
end
config.vm.define "FreeBSD10" do |box|
box.vm.guest = :freebsd
box.vm.box = "robin/freebsd-10"
# FreeBSD does not support 'mount_virtualbox_shared_folder', use NFS
box.vm.synced_folder ".", "/vagrant", :nfs => true, id: "vagrant-root"
box.vm.network "private_network", ip: "10.0.1.10"
# build everything after creating VM, skip using --no-provision
box.vm.provision "shell", inline: <<-SCRIPT
pkg install -y gmake clang35
export CXX=/usr/local/bin/clang++35
cd /vagrant
gmake clean
gmake all OPT=-g
SCRIPT
end
end

View File

@ -1,228 +0,0 @@
# Microsoft Contribution Notes
## Contributors
* Alexander Zinoviev https://github.com/zinoale
* Dmitri Smirnov https://github.com/yuslepukhin
* Praveen Rao https://github.com/PraveenSinghRao
* Sherlock Huang https://github.com/SherlockNoMad
## Introduction
RocksDB is a well proven open source key-value persistent store, optimized for fast storage. It provides scalability with number of CPUs and storage IOPS, to support IO-bound, in-memory and write-once workloads, most importantly, to be flexible to allow for innovation.
As Microsoft Bing team we have been continuously pushing hard to improve the scalability, efficiency of platform and eventually benefit Bing end-user satisfaction. We would like to explore the opportunity to embrace open source, RocksDB here, to use, enhance and customize for our usage, and also contribute back to the RocksDB community. Herein, we are pleased to offer this RocksDB port for Windows platform.
These notes describe some decisions and changes we had to make with regards to porting RocksDB on Windows. We hope this will help both reviewers and users of the Windows port.
We are open for comments and improvements.
## OS specifics
All of the porting, testing and benchmarking was done on Windows Server 2012 R2 Datacenter 64-bit but to the best of our knowledge there is not a specific API we used during porting that is unsupported on other Windows OS after Vista.
## Porting goals
We strive to achieve the following goals:
* make use of the existing porting interface of RocksDB
* make minimum [WY2]modifications within platform independent code.
* make all unit test pass both in debug and release builds.
* Note: latest introduction of SyncPoint seems to disable running db_test in Release.
* make performance on par with published benchmarks accounting for HW differences
* we would like to keep the port code inline with the main branch with no forking
## Build system
We have chosen CMake as a widely accepted build system to build the Windows port. It is very fast and convenient.
At the same time it generates Visual Studio projects that are both usable from a command line and IDE.
The top-level CMakeLists.txt file contains description of all targets and build rules. It also provides brief instructions on how to build the software for Windows. One more build related file is thirdparty.inc that also resides on the top level. This file must be edited to point to actual third party libraries location.
We think that it would be beneficial to merge the existing make-based build system and the new cmake-based build system into a single one to use on all platforms.
All building and testing was done for 64-bit. We have not conducted any testing for 32-bit and early reports indicate that it will not run on 32-bit.
## C++ and STL notes
We had to make some minimum changes within the portable files that either account for OS differences or the shortcomings of C++11 support in the current version of the MS compiler. Most or all of them are expected to be fixed in the upcoming compiler releases.
We plan to use this port for our business purposes here at Bing and this provided business justification for this port. This also means, we do not have at present to choose the compiler version at will.
* Certain headers that are not present and not necessary on Windows were simply `#ifndef OS_WIN` in a few places (`unistd.h`)
* All posix specific headers were replaced to port/port.h which worked well
* Replaced `dirent.h` for `port/port_dirent.h` (very few places) with the implementation of the relevant interfaces within `rocksdb::port` namespace
* Replaced `sys/time.h` to `port/sys_time.h` (few places) implemented equivalents within `rocksdb::port`
* `printf %z` specification is not supported on Windows. To imitate existing standards we came up with a string macro `ROCKSDB_PRIszt` which expands to `zu` on posix systems and to `Iu` on windows.
* in class member initialization were moved to a __ctors in some cases
* `constexpr` is not supported. We had to replace `std::numeric_limits<>::max/min()` to its C macros for constants. Sometimes we had to make class members `static const` and place a definition within a .cc file.
* `constexpr` for functions was replaced to a template specialization (1 place)
* Union members that have non-trivial constructors were replaced to `char[]` in one place along with bug fixes (spatial experimental feature)
* Zero-sized arrays are deemed a non-standard extension which we converted to 1 size array and that should work well for the purposes of these classes.
* `std::chrono` lacks nanoseconds support (fixed in the upcoming release of the STL) and we had to use `QueryPerfCounter()` within env_win.cc
* Function local statics initialization is still not safe. Used `std::once` to mitigate within WinEnv.
## Windows Environments notes
We endeavored to make it functionally on par with posix_env. This means we replicated the functionality of the thread pool and other things as precise as possible, including:
* Replicate posix logic using std:thread primitives.
* Implement all posix_env disk access functionality.
* Set `use_os_buffer=false` to disable OS disk buffering for WinWritableFile and WinRandomAccessFile.
* Replace `pread/pwrite` with `WriteFile/ReadFile` with `OVERLAPPED` structure.
* Use `SetFileInformationByHandle` to compensate absence of `fallocate`.
### In detail
Even though Windows provides its own efficient thread-pool implementation we chose to replicate posix logic using `std::thread` primitives. This allows anyone to quickly detect any changes within the posix source code and replicate them within windows env. This has proven to work very well. At the same time for anyone who wishes to replace the built-in thread-pool can do so using RocksDB stackable environments.
For disk access we implemented all of the functionality present within the posix_env which includes memory mapped files, random access, rate-limiter support etc.
The `use_os_buffer` flag on Posix platforms currently denotes disabling read-ahead log via `fadvise` mechanism. Windows does not have `fadvise` system call. What is more, it implements disk cache in a way that differs from Linux greatly. It's not an uncommon practice on Windows to perform un-buffered disk access to gain control of the memory consumption. We think that in our use case this may also be a good configuration option at the expense of disk throughput. To compensate one may increase the configured in-memory cache size instead. Thus we have chosen `use_os_buffer=false` to disable OS disk buffering for `WinWritableFile` and `WinRandomAccessFile`. The OS imposes restrictions on the alignment of the disk offsets, buffers used and the amount of data that is read/written when accessing files in un-buffered mode. When the option is true, the classes behave in a standard way. This allows to perform writes and reads in cases when un-buffered access does not make sense such as WAL and MANIFEST.
We have replaced `pread/pwrite` with `WriteFile/ReadFile` with `OVERLAPPED` structure so we can atomically seek to the position of the disk operation but still perform the operation synchronously. Thus we able to emulate that functionality of `pread/pwrite` reasonably well. The only difference is that the file pointer is not returned to its original position but that hardly matters given the random nature of access.
We used `SetFileInformationByHandle` both to truncate files after writing a full final page to disk and to pre-allocate disk space for faster I/O thus compensating for the absence of `fallocate` although some differences remain. For example, the pre-allocated space is not filled with zeros like on Linux, however, on a positive note, the end of file position is also not modified after pre-allocation.
RocksDB renames, copies and deletes files at will even though they may be opened with another handle at the same time. We had to relax and allow nearly all the concurrent access permissions possible.
## Thread-Local Storage
Thread-Local storage plays a significant role for RocksDB performance. Rather than creating a separate implementation we chose to create inline wrappers that forward `pthread_specific` calls to Windows `Tls` interfaces within `rocksdb::port` namespace. This leaves the existing meat of the logic in tact and unchanged and just as maintainable.
To mitigate the lack of thread local storage cleanup on thread-exit we added a limited amount of windows specific code within the same thread_local.cc file that injects a cleanup callback into a `"__tls"` structure within `".CRT$XLB"` data segment. This approach guarantees that the callback is invoked regardless of whether RocksDB used within an executable, standalone DLL or within another DLL.
## Jemalloc usage
When RocksDB is used with Jemalloc the latter needs to be initialized before any of the C++ globals or statics. To accomplish that we injected an initialization routine into `".CRT$XCT"` that is automatically invoked by the runtime before initializing static objects. je-uninit is queued to `atexit()`.
The jemalloc redirecting `new/delete` global operators are used by the linker providing certain conditions are met. See build section in these notes.
## Stack Trace and Unhandled Exception Handler
We decided not to implement these two features because the hosting program as a rule has these two things in it.
We experienced no inconveniences debugging issues in the debugger or analyzing process dumps if need be and thus we did not
see this as a priority.
## Performance results
### Setup
All of the benchmarks are run on the same set of machines. Here are the details of the test setup:
* 2 Intel(R) Xeon(R) E5 2450 0 @ 2.10 GHz (total 16 cores)
* 2 XK0480GDQPH SSD Device, total 894GB free disk
* Machine has 128 GB of RAM
* Operating System: Windows Server 2012 R2 Datacenter
* 100 Million keys; each key is of size 10 bytes, each value is of size 800 bytes
* total database size is ~76GB
* The performance result is based on RocksDB 3.11.
* The parameters used, unless specified, were exactly the same as published in the GitHub Wiki page.
### RocksDB on flash storage
#### Test 1. Bulk Load of keys in Random Order
Version 3.11
* Total Run Time: 17.6 min
* Fillrandom: 5.480 micros/op 182465 ops/sec; 142.0 MB/s
* Compact: 486056544.000 micros/op 0 ops/sec
Version 3.10
* Total Run Time: 16.2 min
* Fillrandom: 5.018 micros/op 199269 ops/sec; 155.1 MB/s
* Compact: 441313173.000 micros/op 0 ops/sec;
#### Test 2. Bulk Load of keys in Sequential Order
Version 3.11
* Fillseq: 4.944 micros/op 202k ops/sec; 157.4 MB/s
Version 3.10
* Fillseq: 4.105 micros/op 243.6k ops/sec; 189.6 MB/s
#### Test 3. Random Write
Version 3.11
* Unbuffered I/O enabled
* Overwrite: 52.661 micros/op 18.9k ops/sec; 14.8 MB/s
Version 3.10
* Unbuffered I/O enabled
* Overwrite: 52.661 micros/op 18.9k ops/sec;
#### Test 4. Random Read
Version 3.11
* Unbuffered I/O enabled
* Readrandom: 15.716 micros/op 63.6k ops/sec; 49.5 MB/s
Version 3.10
* Unbuffered I/O enabled
* Readrandom: 15.548 micros/op 64.3k ops/sec;
#### Test 5. Multi-threaded read and single-threaded write
Version 3.11
* Unbuffered I/O enabled
* Readwhilewriting: 25.128 micros/op 39.7k ops/sec;
Version 3.10
* Unbuffered I/O enabled
* Readwhilewriting: 24.854 micros/op 40.2k ops/sec;
### RocksDB In Memory
#### Test 1. Point Lookup
Version 3.11
80K writes/sec
* Write Rate Achieved: 40.5k write/sec;
* Readwhilewriting: 0.314 micros/op 3187455 ops/sec; 364.8 MB/s (715454999 of 715454999 found)
Version 3.10
* Write Rate Achieved: 50.6k write/sec
* Readwhilewriting: 0.316 micros/op 3162028 ops/sec; (719576999 of 719576999 found)
*10K writes/sec*
Version 3.11
* Write Rate Achieved: 5.8k/s write/sec
* Readwhilewriting: 0.246 micros/op 4062669 ops/sec; 464.9 MB/s (915481999 of 915481999 found)
Version 3.10
* Write Rate Achieved: 5.8k/s write/sec
* Readwhilewriting: 0.244 micros/op 4106253 ops/sec; (927986999 of 927986999 found)
#### Test 2. Prefix Range Query
Version 3.11
80K writes/sec
* Write Rate Achieved: 46.3k/s write/sec
* Readwhilewriting: 0.362 micros/op 2765052 ops/sec; 316.4 MB/s (611549999 of 611549999 found)
Version 3.10
* Write Rate Achieved: 45.8k/s write/sec
* Readwhilewriting: 0.317 micros/op 3154941 ops/sec; (708158999 of 708158999 found)
Version 3.11
10K writes/sec
* Write Rate Achieved: 5.78k write/sec
* Readwhilewriting: 0.269 micros/op 3716692 ops/sec; 425.3 MB/s (837401999 of 837401999 found)
Version 3.10
* Write Rate Achieved: 5.7k write/sec
* Readwhilewriting: 0.261 micros/op 3830152 ops/sec; (863482999 of 863482999 found)
We think that there is still big room to improve the performance, which will be an ongoing effort for us.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,310 +0,0 @@
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
try:
from builtins import str
except ImportError:
from __builtin__ import str
from targets_builder import TARGETSBuilder
import json
import os
import fnmatch
import sys
from util import ColorString
# This script generates TARGETS file for Buck.
# Buck is a build tool specifying dependencies among different build targets.
# User can pass extra dependencies as a JSON object via command line, and this
# script can include these dependencies in the generate TARGETS file.
# Usage:
# $python3 buckifier/buckify_rocksdb.py
# (This generates a TARGET file without user-specified dependency for unit
# tests.)
# $python3 buckifier/buckify_rocksdb.py \
# '{"fake": {
# "extra_deps": [":test_dep", "//fakes/module:mock1"],
# "extra_compiler_flags": ["-DROCKSDB_LITE", "-Os"]
# }
# }'
# (Generated TARGETS file has test_dep and mock1 as dependencies for RocksDB
# unit tests, and will use the extra_compiler_flags to compile the unit test
# source.)
# tests to export as libraries for inclusion in other projects
_EXPORTED_TEST_LIBS = ["env_basic_test"]
# Parse src.mk files as a Dictionary of
# VAR_NAME => list of files
def parse_src_mk(repo_path):
src_mk = repo_path + "/src.mk"
src_files = {}
for line in open(src_mk):
line = line.strip()
if len(line) == 0 or line[0] == '#':
continue
if '=' in line:
current_src = line.split('=')[0].strip()
src_files[current_src] = []
elif '.c' in line:
src_path = line.split('\\')[0].strip()
src_files[current_src].append(src_path)
return src_files
# get all .cc / .c files
def get_cc_files(repo_path):
cc_files = []
for root, dirnames, filenames in os.walk(repo_path): # noqa: B007 T25377293 Grandfathered in
root = root[(len(repo_path) + 1):]
if "java" in root:
# Skip java
continue
for filename in fnmatch.filter(filenames, '*.cc'):
cc_files.append(os.path.join(root, filename))
for filename in fnmatch.filter(filenames, '*.c'):
cc_files.append(os.path.join(root, filename))
return cc_files
# Get non_parallel tests from Makefile
def get_non_parallel_tests(repo_path):
Makefile = repo_path + "/Makefile"
s = set({})
found_non_parallel_tests = False
for line in open(Makefile):
line = line.strip()
if line.startswith("NON_PARALLEL_TEST ="):
found_non_parallel_tests = True
elif found_non_parallel_tests:
if line.endswith("\\"):
# remove the trailing \
line = line[:-1]
line = line.strip()
s.add(line)
else:
# we consumed all the non_parallel tests
break
return s
# Parse extra dependencies passed by user from command line
def get_dependencies():
deps_map = {
'': {
'extra_deps': [],
'extra_compiler_flags': []
}
}
if len(sys.argv) < 2:
return deps_map
def encode_dict(data):
rv = {}
for k, v in data.items():
if isinstance(v, dict):
v = encode_dict(v)
rv[k] = v
return rv
extra_deps = json.loads(sys.argv[1], object_hook=encode_dict)
for target_alias, deps in extra_deps.items():
deps_map[target_alias] = deps
return deps_map
# Prepare TARGETS file for buck
def generate_targets(repo_path, deps_map):
print(ColorString.info("Generating TARGETS"))
# parsed src.mk file
src_mk = parse_src_mk(repo_path)
# get all .cc files
cc_files = get_cc_files(repo_path)
# get non_parallel tests from Makefile
non_parallel_tests = get_non_parallel_tests(repo_path)
if src_mk is None or cc_files is None or non_parallel_tests is None:
return False
extra_argv = ""
if len(sys.argv) >= 2:
# Heuristically quote and canonicalize whitespace for inclusion
# in how the file was generated.
extra_argv = " '{0}'".format(" ".join(sys.argv[1].split()))
TARGETS = TARGETSBuilder("%s/TARGETS" % repo_path, extra_argv)
# rocksdb_lib
TARGETS.add_library(
"rocksdb_lib",
src_mk["LIB_SOURCES"] +
# always add range_tree, it's only excluded on ppc64, which we don't use internally
src_mk["RANGE_TREE_SOURCES"] +
src_mk["TOOL_LIB_SOURCES"],
deps=["//folly/container:f14_hash"])
# rocksdb_whole_archive_lib
TARGETS.add_library(
"rocksdb_whole_archive_lib",
src_mk["LIB_SOURCES"] +
# always add range_tree, it's only excluded on ppc64, which we don't use internally
src_mk["RANGE_TREE_SOURCES"] +
src_mk["TOOL_LIB_SOURCES"],
deps=["//folly/container:f14_hash"],
headers=None,
extra_external_deps="",
link_whole=True)
# rocksdb_test_lib
TARGETS.add_library(
"rocksdb_test_lib",
src_mk.get("MOCK_LIB_SOURCES", []) +
src_mk.get("TEST_LIB_SOURCES", []) +
src_mk.get("EXP_LIB_SOURCES", []) +
src_mk.get("ANALYZER_LIB_SOURCES", []),
[":rocksdb_lib"],
extra_test_libs=True
)
# rocksdb_tools_lib
TARGETS.add_library(
"rocksdb_tools_lib",
src_mk.get("BENCH_LIB_SOURCES", []) +
src_mk.get("ANALYZER_LIB_SOURCES", []) +
["test_util/testutil.cc"],
[":rocksdb_lib"])
# rocksdb_cache_bench_tools_lib
TARGETS.add_library(
"rocksdb_cache_bench_tools_lib",
src_mk.get("CACHE_BENCH_LIB_SOURCES", []),
[":rocksdb_lib"])
# rocksdb_stress_lib
TARGETS.add_rocksdb_library(
"rocksdb_stress_lib",
src_mk.get("ANALYZER_LIB_SOURCES", [])
+ src_mk.get('STRESS_LIB_SOURCES', [])
+ ["test_util/testutil.cc"])
# db_stress binary
TARGETS.add_binary("db_stress",
["db_stress_tool/db_stress.cc"],
[":rocksdb_stress_lib"])
# bench binaries
for src in src_mk.get("MICROBENCH_SOURCES", []):
name = src.rsplit('/',1)[1].split('.')[0] if '/' in src else src.split('.')[0]
TARGETS.add_binary(
name,
[src],
[],
extra_bench_libs=True
)
print("Extra dependencies:\n{0}".format(json.dumps(deps_map)))
# Dictionary test executable name -> relative source file path
test_source_map = {}
# c_test.c is added through TARGETS.add_c_test(). If there
# are more than one .c test file, we need to extend
# TARGETS.add_c_test() to include other C tests too.
for test_src in src_mk.get("TEST_MAIN_SOURCES_C", []):
if test_src != 'db/c_test.c':
print("Don't know how to deal with " + test_src)
return False
TARGETS.add_c_test()
try:
with open(f"{repo_path}/buckifier/bench.json") as json_file:
fast_fancy_bench_config_list = json.load(json_file)
for config_dict in fast_fancy_bench_config_list:
clean_benchmarks = {}
benchmarks = config_dict['benchmarks']
for binary, benchmark_dict in benchmarks.items():
clean_benchmarks[binary] = {}
for benchmark, overloaded_metric_list in benchmark_dict.items():
clean_benchmarks[binary][benchmark] = []
for metric in overloaded_metric_list:
if not isinstance(metric, dict):
clean_benchmarks[binary][benchmark].append(metric)
TARGETS.add_fancy_bench_config(config_dict['name'], clean_benchmarks, False, config_dict['expected_runtime_one_iter'], config_dict['sl_iterations'], config_dict['regression_threshold'])
with open(f"{repo_path}/buckifier/bench-slow.json") as json_file:
slow_fancy_bench_config_list = json.load(json_file)
for config_dict in slow_fancy_bench_config_list:
clean_benchmarks = {}
benchmarks = config_dict['benchmarks']
for binary, benchmark_dict in benchmarks.items():
clean_benchmarks[binary] = {}
for benchmark, overloaded_metric_list in benchmark_dict.items():
clean_benchmarks[binary][benchmark] = []
for metric in overloaded_metric_list:
if not isinstance(metric, dict):
clean_benchmarks[binary][benchmark].append(metric)
for config_dict in slow_fancy_bench_config_list:
TARGETS.add_fancy_bench_config(config_dict['name']+"_slow", clean_benchmarks, True, config_dict['expected_runtime_one_iter'], config_dict['sl_iterations'], config_dict['regression_threshold'])
# it is better servicelab experiments break
# than rocksdb github ci
except Exception:
pass
TARGETS.add_test_header()
for test_src in src_mk.get("TEST_MAIN_SOURCES", []):
test = test_src.split('.c')[0].strip().split('/')[-1].strip()
test_source_map[test] = test_src
print("" + test + " " + test_src)
for target_alias, deps in deps_map.items():
for test, test_src in sorted(test_source_map.items()):
if len(test) == 0:
print(ColorString.warning("Failed to get test name for %s" % test_src))
continue
test_target_name = \
test if not target_alias else test + "_" + target_alias
if test in _EXPORTED_TEST_LIBS:
test_library = "%s_lib" % test_target_name
TARGETS.add_library(test_library, [test_src], deps=[":rocksdb_test_lib"], extra_test_libs=True)
TARGETS.register_test(
test_target_name,
test_src,
deps = json.dumps(deps['extra_deps'] + [':'+test_library]),
extra_compiler_flags = json.dumps(deps['extra_compiler_flags']))
else:
TARGETS.register_test(
test_target_name,
test_src,
deps = json.dumps(deps['extra_deps'] + [":rocksdb_test_lib"] ),
extra_compiler_flags = json.dumps(deps['extra_compiler_flags']))
print(ColorString.info("Generated TARGETS Summary:"))
print(ColorString.info("- %d libs" % TARGETS.total_lib))
print(ColorString.info("- %d binarys" % TARGETS.total_bin))
print(ColorString.info("- %d tests" % TARGETS.total_test))
return True
def get_rocksdb_path():
# rocksdb = {script_dir}/..
script_dir = os.path.dirname(sys.argv[0])
script_dir = os.path.abspath(script_dir)
rocksdb_path = os.path.abspath(
os.path.join(script_dir, "../"))
return rocksdb_path
def exit_with_error(msg):
print(ColorString.error(msg))
sys.exit(1)
def main():
deps_map = get_dependencies()
# Generate TARGETS file for buck
ok = generate_targets(get_rocksdb_path(), deps_map)
if not ok:
exit_with_error("Failed to generate TARGETS files")
if __name__ == "__main__":
main()

View File

@ -1,32 +0,0 @@
#!/usr/bin/env bash
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
# If clang_format_diff.py command is not specfied, we assume we are able to
# access directly without any path.
TGT_DIFF=`git diff TARGETS | head -n 1`
if [ ! -z "$TGT_DIFF" ]
then
echo "TARGETS file has uncommitted changes. Skip this check."
exit 0
fi
echo Backup original TARGETS file.
cp TARGETS TARGETS.bkp
${PYTHON:-python3} buckifier/buckify_rocksdb.py
TGT_DIFF=`git diff TARGETS | head -n 1`
if [ -z "$TGT_DIFF" ]
then
mv TARGETS.bkp TARGETS
exit 0
else
echo "Please run '${PYTHON:-python3} buckifier/buckify_rocksdb.py' to update TARGETS file."
echo "Do not manually update TARGETS file."
${PYTHON:-python3} --version
mv TARGETS.bkp TARGETS
exit 1
fi

View File

@ -1,6 +0,0 @@
#!/usr/bin/env bash
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
# Create a tmp directory for the test to use
TEST_DIR=$(mktemp -d /dev/shm/fbcode_rocksdb_XXXXXXX)
# shellcheck disable=SC2068
TEST_TMPDIR="$TEST_DIR" $@ && rm -rf "$TEST_DIR"

View File

@ -1,113 +0,0 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
try:
from builtins import object
from builtins import str
except ImportError:
from __builtin__ import object
from __builtin__ import str
import targets_cfg
import pprint
def pretty_list(lst, indent=8):
if lst is None or len(lst) == 0:
return ""
if len(lst) == 1:
return "\"%s\"" % lst[0]
separator = "\",\n%s\"" % (" " * indent)
res = separator.join(sorted(lst))
res = "\n" + (" " * indent) + "\"" + res + "\",\n" + (" " * (indent - 4))
return res
class TARGETSBuilder(object):
def __init__(self, path, extra_argv):
self.path = path
self.targets_file = open(path, 'wb')
header = targets_cfg.rocksdb_target_header_template.format(
extra_argv=extra_argv)
self.targets_file.write(header.encode("utf-8"))
self.total_lib = 0
self.total_bin = 0
self.total_test = 0
self.tests_cfg = ""
def __del__(self):
self.targets_file.close()
def add_library(self, name, srcs, deps=None, headers=None,
extra_external_deps="", link_whole=False,
external_dependencies=None, extra_test_libs=False):
if headers is not None:
headers = "[" + pretty_list(headers) + "]"
self.targets_file.write(targets_cfg.library_template.format(
name=name,
srcs=pretty_list(srcs),
headers=headers,
deps=pretty_list(deps),
extra_external_deps=extra_external_deps,
link_whole=link_whole,
external_dependencies=pretty_list(external_dependencies),
extra_test_libs=extra_test_libs
).encode("utf-8"))
self.total_lib = self.total_lib + 1
def add_rocksdb_library(self, name, srcs, headers=None,
external_dependencies=None):
if headers is not None:
headers = "[" + pretty_list(headers) + "]"
self.targets_file.write(targets_cfg.rocksdb_library_template.format(
name=name,
srcs=pretty_list(srcs),
headers=headers,
external_dependencies=pretty_list(external_dependencies)
).encode("utf-8")
)
self.total_lib = self.total_lib + 1
def add_binary(self, name, srcs, deps=None, extra_preprocessor_flags=None,extra_bench_libs=False):
self.targets_file.write(targets_cfg.binary_template.format(
name=name,
srcs=pretty_list(srcs),
deps=pretty_list(deps),
extra_preprocessor_flags=pretty_list(extra_preprocessor_flags),
extra_bench_libs=extra_bench_libs,
).encode("utf-8"))
self.total_bin = self.total_bin + 1
def add_c_test(self):
self.targets_file.write(b"""
add_c_test_wrapper()
""")
def add_test_header(self):
self.targets_file.write(b"""
# Generate a test rule for each entry in ROCKS_TESTS
# Do not build the tests in opt mode, since SyncPoint and other test code
# will not be included.
""")
def add_fancy_bench_config(self, name, bench_config, slow, expected_runtime, sl_iterations, regression_threshold):
self.targets_file.write(targets_cfg.fancy_bench_template.format(
name=name,
bench_config=pprint.pformat(bench_config),
slow=slow,
expected_runtime=expected_runtime,
sl_iterations=sl_iterations,
regression_threshold=regression_threshold
).encode("utf-8"))
def register_test(self,
test_name,
src,
deps,
extra_compiler_flags):
self.targets_file.write(targets_cfg.unittests_template.format(test_name=test_name,test_cc=str(src),deps=deps,
extra_compiler_flags=extra_compiler_flags).encode("utf-8"))
self.total_test = self.total_test + 1

View File

@ -1,46 +0,0 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
rocksdb_target_header_template = \
"""# This file \100generated by:
#$ python3 buckifier/buckify_rocksdb.py{extra_argv}
# --> DO NOT EDIT MANUALLY <--
# This file is a Facebook-specific integration for buck builds, so can
# only be validated by Facebook employees.
#
# @noautodeps @nocodemods
load("//rocks/buckifier:defs.bzl", "cpp_library_wrapper","rocks_cpp_library_wrapper","cpp_binary_wrapper","cpp_unittest_wrapper","fancy_bench_wrapper","add_c_test_wrapper")
"""
library_template = """
cpp_library_wrapper(name="{name}", srcs=[{srcs}], deps=[{deps}], headers={headers}, link_whole={link_whole}, extra_test_libs={extra_test_libs})
"""
rocksdb_library_template = """
rocks_cpp_library_wrapper(name="{name}", srcs=[{srcs}], headers={headers})
"""
binary_template = """
cpp_binary_wrapper(name="{name}", srcs=[{srcs}], deps=[{deps}], extra_preprocessor_flags=[{extra_preprocessor_flags}], extra_bench_libs={extra_bench_libs})
"""
unittests_template = """
cpp_unittest_wrapper(name="{test_name}",
srcs=["{test_cc}"],
deps={deps},
extra_compiler_flags={extra_compiler_flags})
"""
fancy_bench_template = """
fancy_bench_wrapper(suite_name="{name}", binary_to_bench_to_metric_list_map={bench_config}, slow={slow}, expected_runtime={expected_runtime}, sl_iterations={sl_iterations}, regression_threshold={regression_threshold})
"""

View File

@ -1,119 +0,0 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
"""
This module keeps commonly used components.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
try:
from builtins import object
except ImportError:
from __builtin__ import object
import subprocess
import sys
import os
import time
class ColorString(object):
""" Generate colorful strings on terminal """
HEADER = '\033[95m'
BLUE = '\033[94m'
GREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
ENDC = '\033[0m'
@staticmethod
def _make_color_str(text, color):
# In Python2, default encoding for unicode string is ASCII
if sys.version_info.major <= 2:
return "".join(
[color, text.encode('utf-8'), ColorString.ENDC])
# From Python3, default encoding for unicode string is UTF-8
return "".join(
[color, text, ColorString.ENDC])
@staticmethod
def ok(text):
if ColorString.is_disabled:
return text
return ColorString._make_color_str(text, ColorString.GREEN)
@staticmethod
def info(text):
if ColorString.is_disabled:
return text
return ColorString._make_color_str(text, ColorString.BLUE)
@staticmethod
def header(text):
if ColorString.is_disabled:
return text
return ColorString._make_color_str(text, ColorString.HEADER)
@staticmethod
def error(text):
if ColorString.is_disabled:
return text
return ColorString._make_color_str(text, ColorString.FAIL)
@staticmethod
def warning(text):
if ColorString.is_disabled:
return text
return ColorString._make_color_str(text, ColorString.WARNING)
is_disabled = False
def run_shell_command(shell_cmd, cmd_dir=None):
""" Run a single shell command.
@returns a tuple of shell command return code, stdout, stderr """
if cmd_dir is not None and not os.path.exists(cmd_dir):
run_shell_command("mkdir -p %s" % cmd_dir)
start = time.time()
print("\t>>> Running: " + shell_cmd)
p = subprocess.Popen(shell_cmd,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
cwd=cmd_dir)
stdout, stderr = p.communicate()
end = time.time()
# Report time if we spent more than 5 minutes executing a command
execution_time = end - start
if execution_time > (60 * 5):
mins = (execution_time / 60)
secs = (execution_time % 60)
print("\t>time spent: %d minutes %d seconds" % (mins, secs))
return p.returncode, stdout, stderr
def run_shell_commands(shell_cmds, cmd_dir=None, verbose=False):
""" Execute a sequence of shell commands, which is equivalent to
running `cmd1 && cmd2 && cmd3`
@returns boolean indication if all commands succeeds.
"""
if cmd_dir:
print("\t=== Set current working directory => %s" % cmd_dir)
for shell_cmd in shell_cmds:
ret_code, stdout, stderr = run_shell_command(shell_cmd, cmd_dir)
if stdout:
if verbose or ret_code != 0:
print(ColorString.info("stdout: \n"), stdout)
if stderr:
# contents in stderr is not necessarily to be error messages.
if verbose or ret_code != 0:
print(ColorString.error("stderr: \n"), stderr)
if ret_code != 0:
return False
return True

View File

@ -1,111 +0,0 @@
#!/usr/bin/python
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
# amalgamate.py creates an amalgamation from a unity build.
# It can be run with either Python 2 or 3.
# An amalgamation consists of a header that includes the contents of all public
# headers and a source file that includes the contents of all source files and
# private headers.
#
# This script works by starting with the unity build file and recursively expanding
# #include directives. If the #include is found in a public include directory,
# that header is expanded into the amalgamation header.
#
# A particular header is only expanded once, so this script will
# break if there are multiple inclusions of the same header that are expected to
# expand differently. Similarly, this type of code causes issues:
#
# #ifdef FOO
# #include "bar.h"
# // code here
# #else
# #include "bar.h" // oops, doesn't get expanded
# // different code here
# #endif
#
# The solution is to move the include out of the #ifdef.
from __future__ import print_function
import argparse
from os import path
import re
import sys
include_re = re.compile('^[ \t]*#include[ \t]+"(.*)"[ \t]*$')
included = set()
excluded = set()
def find_header(name, abs_path, include_paths):
samedir = path.join(path.dirname(abs_path), name)
if path.exists(samedir):
return samedir
for include_path in include_paths:
include_path = path.join(include_path, name)
if path.exists(include_path):
return include_path
return None
def expand_include(include_path, f, abs_path, source_out, header_out, include_paths, public_include_paths):
if include_path in included:
return False
included.add(include_path)
with open(include_path) as f:
print('#line 1 "{}"'.format(include_path), file=source_out)
process_file(f, include_path, source_out, header_out, include_paths, public_include_paths)
return True
def process_file(f, abs_path, source_out, header_out, include_paths, public_include_paths):
for (line, text) in enumerate(f):
m = include_re.match(text)
if m:
filename = m.groups()[0]
# first check private headers
include_path = find_header(filename, abs_path, include_paths)
if include_path:
if include_path in excluded:
source_out.write(text)
expanded = False
else:
expanded = expand_include(include_path, f, abs_path, source_out, header_out, include_paths, public_include_paths)
else:
# now try public headers
include_path = find_header(filename, abs_path, public_include_paths)
if include_path:
# found public header
expanded = False
if include_path in excluded:
source_out.write(text)
else:
expand_include(include_path, f, abs_path, header_out, None, public_include_paths, [])
else:
sys.exit("unable to find {}, included in {} on line {}".format(filename, abs_path, line))
if expanded:
print('#line {} "{}"'.format(line+1, abs_path), file=source_out)
elif text != "#pragma once\n":
source_out.write(text)
def main():
parser = argparse.ArgumentParser(description="Transform a unity build into an amalgamation")
parser.add_argument("source", help="source file")
parser.add_argument("-I", action="append", dest="include_paths", help="include paths for private headers")
parser.add_argument("-i", action="append", dest="public_include_paths", help="include paths for public headers")
parser.add_argument("-x", action="append", dest="excluded", help="excluded header files")
parser.add_argument("-o", dest="source_out", help="output C++ file", required=True)
parser.add_argument("-H", dest="header_out", help="output C++ header file", required=True)
args = parser.parse_args()
include_paths = list(map(path.abspath, args.include_paths or []))
public_include_paths = list(map(path.abspath, args.public_include_paths or []))
excluded.update(map(path.abspath, args.excluded or []))
filename = args.source
abs_path = path.abspath(filename)
with open(filename) as f, open(args.source_out, 'w') as source_out, open(args.header_out, 'w') as header_out:
print('#line 1 "{}"'.format(filename), file=source_out)
print('#include "{}"'.format(header_out.name), file=source_out)
process_file(f, abs_path, source_out, header_out, include_paths, public_include_paths)
if __name__ == "__main__":
main()

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,22 @@
#!/bin/sh
#
# Record the version of the source that we are compiling.
# We keep a record of the git revision in util/version.cc. This source file
# is then built as a regular source file as part of the compilation process.
# One can run "strings executable_filename | grep _build_" to find the version of
# the source that we used to build the executable file.
OUTFILE="$PWD/util/build_version.cc"
GIT_SHA=""
if command -v git >/dev/null 2>&1; then
GIT_SHA=$(git rev-parse HEAD 2>/dev/null)
fi
cat > "${OUTFILE}" <<EOF
#include "build_version.h"
const char* rocksdb_build_git_sha = "rocksdb_build_git_sha:${GIT_SHA}";
const char* rocksdb_build_git_datetime = "rocksdb_build_git_datetime:$(date)";
const char* rocksdb_build_compile_date = __DATE__;
const char* rocksdb_build_compile_time = __TIME__;
EOF

View File

@ -1,36 +0,0 @@
#!/usr/bin/env bash
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
#
# Check for some simple mistakes that should prevent commit or push
BAD=""
git grep 'namespace rocksdb' -- '*.[ch]*'
if [ "$?" != "1" ]; then
echo "^^^^^ Do not hardcode namespace rocksdb. Use ROCKSDB_NAMESPACE"
BAD=1
fi
git grep -i 'nocommit' -- ':!build_tools/check-sources.sh'
if [ "$?" != "1" ]; then
echo "^^^^^ Code was not intended to be committed"
BAD=1
fi
git grep '<rocksdb/' -- ':!build_tools/check-sources.sh'
if [ "$?" != "1" ]; then
echo '^^^^^ Use double-quotes as in #include "rocksdb/something.h"'
BAD=1
fi
git grep 'using namespace' -- ':!build_tools' ':!docs' \
':!third-party/folly/folly/lang/Align.h' \
':!third-party/gtest-1.8.1/fused-src/gtest/gtest.h'
if [ "$?" != "1" ]; then
echo '^^^^ Do not use "using namespace"'
BAD=1
fi
if [ "$BAD" ]; then
exit 1
fi

View File

@ -0,0 +1,19 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
GCC_BASE=/mnt/gvfs/third-party2/gcc/7331085db891a2ef4a88a48a751d834e8d68f4cb/7.x/centos7-native/b2ef2b6
CLANG_BASE=/mnt/gvfs/third-party2/llvm-fb/963d9aeda70cc4779885b1277484fe7544a04e3e/9.0.0/platform007/9e92d53/
LIBGCC_BASE=/mnt/gvfs/third-party2/libgcc/6ace84e956873d53638c738b6f65f3f469cca74c/7.x/platform007/5620abc
GLIBC_BASE=/mnt/gvfs/third-party2/glibc/192b0f42d63dcf6210d6ceae387b49af049e6e0c/2.26/platform007/f259413
SNAPPY_BASE=/mnt/gvfs/third-party2/snappy/7f9bdaada18f59bc27ec2b0871eb8a6144343aef/1.1.3/platform007/ca4da3d
ZLIB_BASE=/mnt/gvfs/third-party2/zlib/2d9f0b9a4274cc21f61272a9e89bdb859bce8f1f/1.2.8/platform007/ca4da3d
BZIP2_BASE=/mnt/gvfs/third-party2/bzip2/dc49a21c5fceec6456a7a28a94dcd16690af1337/1.0.6/platform007/ca4da3d
LZ4_BASE=/mnt/gvfs/third-party2/lz4/0f607f8fc442ea7d6b876931b1898bb573d5e5da/1.9.1/platform007/ca4da3d
ZSTD_BASE=/mnt/gvfs/third-party2/zstd/ca22bc441a4eb709e9e0b1f9fec9750fed7b31c5/1.4.x/platform007/15a3614
GFLAGS_BASE=/mnt/gvfs/third-party2/gflags/0b9929d2588991c65a57168bf88aff2db87c5d48/2.2.0/platform007/ca4da3d
JEMALLOC_BASE=/mnt/gvfs/third-party2/jemalloc/c26f08f47ac35fc31da2633b7da92d6b863246eb/master/platform007/c26c002
NUMA_BASE=/mnt/gvfs/third-party2/numa/3f3fb57a5ccc5fd21c66416c0b83e0aa76a05376/2.0.11/platform007/ca4da3d
LIBUNWIND_BASE=/mnt/gvfs/third-party2/libunwind/40c73d874898b386a71847f1b99115d93822d11f/1.4/platform007/6f3e0a9
TBB_BASE=/mnt/gvfs/third-party2/tbb/4ce8e8dba77cdbd81b75d6f0c32fd7a1b76a11ec/2018_U5/platform007/ca4da3d
KERNEL_HEADERS_BASE=/mnt/gvfs/third-party2/kernel-headers/fb251ecd2f5ae16f8671f7014c246e52a748fe0b/fb/platform007/da39a3e
BINUTILS_BASE=/mnt/gvfs/third-party2/binutils/ab9f09bba370e7066cafd4eb59752db93f2e8312/2.29.1/platform007/15a3614
VALGRIND_BASE=/mnt/gvfs/third-party2/valgrind/d42d152a15636529b0861ec493927200ebebca8e/3.15.0/platform007/ca4da3d
LUA_BASE=/mnt/gvfs/third-party2/lua/f0cd714433206d5139df61659eb7b28b1dea6683/5.3.4/platform007/5007832

View File

@ -1,22 +0,0 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
GCC_BASE=/mnt/gvfs/third-party2/gcc/1795efe5f06778c15a92c8f9a2aba5dc496d9d4d/9.x/centos7-native/3bed279
CLANG_BASE=/mnt/gvfs/third-party2/llvm-fb/7318eaac22659b6ff2fe43918e4b69fd0772a8a7/9.0.0/platform009/651ee30
LIBGCC_BASE=/mnt/gvfs/third-party2/libgcc/4959b39cfbe5965a37c861c4c327fa7c5c759b87/9.x/platform009/9202ce7
GLIBC_BASE=/mnt/gvfs/third-party2/glibc/45ce3375cdc77ecb2520bbf8f0ecddd3f98efd7a/2.30/platform009/f259413
SNAPPY_BASE=/mnt/gvfs/third-party2/snappy/be4de3205e029101b18aa8103daa696c2bef3b19/1.1.3/platform009/7f3b187
ZLIB_BASE=/mnt/gvfs/third-party2/zlib/3c160ac5c67e257501e24c6c1d00ad5e01d73db6/1.2.8/platform009/7f3b187
BZIP2_BASE=/mnt/gvfs/third-party2/bzip2/73a237ac5bc0a5f5d67b39b8d253cfebaab88684/1.0.6/platform009/7f3b187
LZ4_BASE=/mnt/gvfs/third-party2/lz4/ec6573523b0ce55ef6373a4801189027cf07bb2c/1.9.1/platform009/7f3b187
ZSTD_BASE=/mnt/gvfs/third-party2/zstd/64c58a207d2495e83abc57a500a956df09b79a7c/1.4.x/platform009/ba86d1f
GFLAGS_BASE=/mnt/gvfs/third-party2/gflags/824d0a8a5abb5b121afd1b35fc3896407ea50092/2.2.0/platform009/7f3b187
JEMALLOC_BASE=/mnt/gvfs/third-party2/jemalloc/d9aef9feb850b168a68736420f217b01cce11a89/master/platform009/c305944
NUMA_BASE=/mnt/gvfs/third-party2/numa/0af65f71e23a67bf65dc91b11f95caa39325c432/2.0.11/platform009/7f3b187
LIBUNWIND_BASE=/mnt/gvfs/third-party2/libunwind/02486dac347645d31dce116f44e1de3177315be2/1.4/platform009/5191652
TBB_BASE=/mnt/gvfs/third-party2/tbb/2e0ec671e550bfca347300bf3f789d9c0fff24ad/2018_U5/platform009/7f3b187
LIBURING_BASE=/mnt/gvfs/third-party2/liburing/70dbd9cfee63a25611417d09433a86d7711b3990/20200729/platform009/7f3b187
KERNEL_HEADERS_BASE=/mnt/gvfs/third-party2/kernel-headers/32b8a2407b634df3f8f948ba373fc4acc6a18296/fb/platform009/da39a3e
BINUTILS_BASE=/mnt/gvfs/third-party2/binutils/08634589372fa5f237bfd374e8c644a8364e78c1/2.32/platform009/ba86d1f/
VALGRIND_BASE=/mnt/gvfs/third-party2/valgrind/6ae525939ad02e5e676855082fbbc7828dbafeac/3.15.0/platform009/7f3b187
LUA_BASE=/mnt/gvfs/third-party2/lua/162efd9561a3d21f6869f4814011e9cf1b3ff4dc/5.3.4/platform009/a6271c4
BENCHMARK_BASE=/mnt/gvfs/third-party2/benchmark/30bf49ad6414325e17f3425b0edcb64239427ae3/1.6.1/platform009/7f3b187
BOOST_BASE=/mnt/gvfs/third-party2/boost/201b7d74941e54b436dfa364a063aa6d2cd7de4c/1.69.0/platform009/8a7ffdf

View File

@ -1,22 +0,0 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
# The file is generated using update_dependencies.sh.
GCC_BASE=/mnt/gvfs/third-party2/gcc/e40bde78650fa91b8405a857e3f10bf336633fb0/11.x/centos7-native/886b5eb
CLANG_BASE=/mnt/gvfs/third-party2/llvm-fb/2043340983c032915adbb6f78903dc855b65aee8/12/platform010/9520e0f
LIBGCC_BASE=/mnt/gvfs/third-party2/libgcc/c00dcc6a3e4125c7e8b248e9a79c14b78ac9e0ca/11.x/platform010/5684a5a
GLIBC_BASE=/mnt/gvfs/third-party2/glibc/0b9c8e4b060eda62f3bc1c6127bbe1256697569b/2.34/platform010/f259413
SNAPPY_BASE=/mnt/gvfs/third-party2/snappy/bc9647f7912b131315827d65cb6189c21f381d05/1.1.3/platform010/76ebdda
ZLIB_BASE=/mnt/gvfs/third-party2/zlib/a6f5f3f1d063d2d00cd02fc12f0f05fc3ab3a994/1.2.11/platform010/76ebdda
BZIP2_BASE=/mnt/gvfs/third-party2/bzip2/09703139cfc376bd8a82642385a0e97726b28287/1.0.6/platform010/76ebdda
LZ4_BASE=/mnt/gvfs/third-party2/lz4/60220d6a5bf7722b9cc239a1368c596619b12060/1.9.1/platform010/76ebdda
ZSTD_BASE=/mnt/gvfs/third-party2/zstd/50eace8143eaaea9473deae1f3283e0049e05633/1.4.x/platform010/64091f4
GFLAGS_BASE=/mnt/gvfs/third-party2/gflags/5d27e5919771603da06000a027b12f799e58a4f7/2.2.0/platform010/76ebdda
JEMALLOC_BASE=/mnt/gvfs/third-party2/jemalloc/b62912d333ef33f9760efa6219dbe3fe6abb3b0e/master/platform010/f57cc4a
NUMA_BASE=/mnt/gvfs/third-party2/numa/6b412770957aa3c8a87e5e0dcd8cc2f45f393bc0/2.0.11/platform010/76ebdda
LIBUNWIND_BASE=/mnt/gvfs/third-party2/libunwind/52f69816e936e147664ad717eb71a1a0e9dc973a/1.4/platform010/5074a48
TBB_BASE=/mnt/gvfs/third-party2/tbb/c9cc192099fa84c0dcd0ffeedd44a373ad6e4925/2018_U5/platform010/76ebdda
LIBURING_BASE=/mnt/gvfs/third-party2/liburing/a98e2d137007e3ebf7f33bd6f99c2c56bdaf8488/20210212/platform010/76ebdda
BENCHMARK_BASE=/mnt/gvfs/third-party2/benchmark/780c7a0f9cf0967961e69ad08e61cddd85d61821/trunk/platform010/76ebdda
KERNEL_HEADERS_BASE=/mnt/gvfs/third-party2/kernel-headers/02d9f76aaaba580611cf75e741753c800c7fdc12/fb/platform010/da39a3e
BINUTILS_BASE=/mnt/gvfs/third-party2/binutils/938dc3f064ef3a48c0446f5b11d788d50b3eb5ee/2.37/centos7-native/da39a3e
VALGRIND_BASE=/mnt/gvfs/third-party2/valgrind/429a6b3203eb415f1599bd15183659153129188e/3.15.0/platform010/76ebdda
LUA_BASE=/mnt/gvfs/third-party2/lua/363787fa5cac2a8aa20638909210443278fa138e/5.3.4/platform010/9079c97

View File

@ -1,3 +0,0 @@
#!/usr/bin/env bash
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
docker run -v $PWD:/rocks -w /rocks buildpack-deps make

View File

@ -1,177 +0,0 @@
# Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
# This source code is licensed under both the GPLv2 (found in the
# COPYING file in the root directory) and Apache 2.0 License
# (found in the LICENSE.Apache file in the root directory).
'''Filter for error messages in test output:
- Receives merged stdout/stderr from test on stdin
- Finds patterns of known error messages for test name (first argument)
- Prints those error messages to stdout
'''
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import re
import sys
class ErrorParserBase(object):
def parse_error(self, line):
'''Parses a line of test output. If it contains an error, returns a
formatted message describing the error; otherwise, returns None.
Subclasses must override this method.
'''
raise NotImplementedError
class GTestErrorParser(ErrorParserBase):
'''A parser that remembers the last test that began running so it can print
that test's name upon detecting failure.
'''
_GTEST_NAME_PATTERN = re.compile(r'\[ RUN \] (\S+)$')
# format: '<filename or "unknown file">:<line #>: Failure'
_GTEST_FAIL_PATTERN = re.compile(r'(unknown file|\S+:\d+): Failure$')
def __init__(self):
self._last_gtest_name = 'Unknown test'
def parse_error(self, line):
gtest_name_match = self._GTEST_NAME_PATTERN.match(line)
if gtest_name_match:
self._last_gtest_name = gtest_name_match.group(1)
return None
gtest_fail_match = self._GTEST_FAIL_PATTERN.match(line)
if gtest_fail_match:
return '%s failed: %s' % (
self._last_gtest_name, gtest_fail_match.group(1))
return None
class MatchErrorParser(ErrorParserBase):
'''A simple parser that returns the whole line if it matches the pattern.
'''
def __init__(self, pattern):
self._pattern = re.compile(pattern)
def parse_error(self, line):
if self._pattern.match(line):
return line
return None
class CompilerErrorParser(MatchErrorParser):
def __init__(self):
# format (compile error):
# '<filename>:<line #>:<column #>: error: <error msg>'
# format (link error):
# '<filename>:<line #>: error: <error msg>'
# The below regex catches both
super(CompilerErrorParser, self).__init__(r'\S+:\d+: error:')
class ScanBuildErrorParser(MatchErrorParser):
def __init__(self):
super(ScanBuildErrorParser, self).__init__(
r'scan-build: \d+ bugs found.$')
class DbCrashErrorParser(MatchErrorParser):
def __init__(self):
super(DbCrashErrorParser, self).__init__(r'\*\*\*.*\^$|TEST FAILED.')
class WriteStressErrorParser(MatchErrorParser):
def __init__(self):
super(WriteStressErrorParser, self).__init__(
r'ERROR: write_stress died with exitcode=\d+')
class AsanErrorParser(MatchErrorParser):
def __init__(self):
super(AsanErrorParser, self).__init__(
r'==\d+==ERROR: AddressSanitizer:')
class UbsanErrorParser(MatchErrorParser):
def __init__(self):
# format: '<filename>:<line #>:<column #>: runtime error: <error msg>'
super(UbsanErrorParser, self).__init__(r'\S+:\d+:\d+: runtime error:')
class ValgrindErrorParser(MatchErrorParser):
def __init__(self):
# just grab the summary, valgrind doesn't clearly distinguish errors
# from other log messages.
super(ValgrindErrorParser, self).__init__(r'==\d+== ERROR SUMMARY:')
class CompatErrorParser(MatchErrorParser):
def __init__(self):
super(CompatErrorParser, self).__init__(r'==== .*[Ee]rror.* ====$')
class TsanErrorParser(MatchErrorParser):
def __init__(self):
super(TsanErrorParser, self).__init__(r'WARNING: ThreadSanitizer:')
_TEST_NAME_TO_PARSERS = {
'punit': [CompilerErrorParser, GTestErrorParser],
'unit': [CompilerErrorParser, GTestErrorParser],
'release': [CompilerErrorParser, GTestErrorParser],
'unit_481': [CompilerErrorParser, GTestErrorParser],
'release_481': [CompilerErrorParser, GTestErrorParser],
'clang_unit': [CompilerErrorParser, GTestErrorParser],
'clang_release': [CompilerErrorParser, GTestErrorParser],
'clang_analyze': [CompilerErrorParser, ScanBuildErrorParser],
'code_cov': [CompilerErrorParser, GTestErrorParser],
'unity': [CompilerErrorParser, GTestErrorParser],
'lite': [CompilerErrorParser],
'lite_test': [CompilerErrorParser, GTestErrorParser],
'stress_crash': [CompilerErrorParser, DbCrashErrorParser],
'stress_crash_with_atomic_flush': [CompilerErrorParser, DbCrashErrorParser],
'stress_crash_with_txn': [CompilerErrorParser, DbCrashErrorParser],
'write_stress': [CompilerErrorParser, WriteStressErrorParser],
'asan': [CompilerErrorParser, GTestErrorParser, AsanErrorParser],
'asan_crash': [CompilerErrorParser, AsanErrorParser, DbCrashErrorParser],
'asan_crash_with_atomic_flush': [CompilerErrorParser, AsanErrorParser, DbCrashErrorParser],
'asan_crash_with_txn': [CompilerErrorParser, AsanErrorParser, DbCrashErrorParser],
'ubsan': [CompilerErrorParser, GTestErrorParser, UbsanErrorParser],
'ubsan_crash': [CompilerErrorParser, UbsanErrorParser, DbCrashErrorParser],
'ubsan_crash_with_atomic_flush': [CompilerErrorParser, UbsanErrorParser, DbCrashErrorParser],
'ubsan_crash_with_txn': [CompilerErrorParser, UbsanErrorParser, DbCrashErrorParser],
'valgrind': [CompilerErrorParser, GTestErrorParser, ValgrindErrorParser],
'tsan': [CompilerErrorParser, GTestErrorParser, TsanErrorParser],
'format_compatible': [CompilerErrorParser, CompatErrorParser],
'run_format_compatible': [CompilerErrorParser, CompatErrorParser],
'no_compression': [CompilerErrorParser, GTestErrorParser],
'run_no_compression': [CompilerErrorParser, GTestErrorParser],
'regression': [CompilerErrorParser],
'run_regression': [CompilerErrorParser],
}
def main():
if len(sys.argv) != 2:
return 'Usage: %s <test name>' % sys.argv[0]
test_name = sys.argv[1]
if test_name not in _TEST_NAME_TO_PARSERS:
return 'Unknown test name: %s' % test_name
error_parsers = []
for parser_cls in _TEST_NAME_TO_PARSERS[test_name]:
error_parsers.append(parser_cls())
for line in sys.stdin:
line = line.strip()
for error_parser in error_parsers:
error_msg = error_parser.parse_error(line)
if error_msg is not None:
print(error_msg)
if __name__ == '__main__':
sys.exit(main())

View File

@ -1,55 +0,0 @@
#!/bin/sh
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
# fail early
set -e
if test -z $ROCKSDB_PATH; then
ROCKSDB_PATH=~/rocksdb
fi
source $ROCKSDB_PATH/build_tools/fbcode_config4.8.1.sh
EXTRA_LDFLAGS=""
if test -z $ALLOC; then
# default
ALLOC=tcmalloc
elif [[ $ALLOC == "jemalloc" ]]; then
ALLOC=system
EXTRA_LDFLAGS+=" -Wl,--whole-archive $JEMALLOC_LIB -Wl,--no-whole-archive"
fi
# we need to force mongo to use static library, not shared
STATIC_LIB_DEP_DIR='build/static_library_dependencies'
test -d $STATIC_LIB_DEP_DIR || mkdir $STATIC_LIB_DEP_DIR
test -h $STATIC_LIB_DEP_DIR/`basename $SNAPPY_LIBS` || ln -s $SNAPPY_LIBS $STATIC_LIB_DEP_DIR
test -h $STATIC_LIB_DEP_DIR/`basename $LZ4_LIBS` || ln -s $LZ4_LIBS $STATIC_LIB_DEP_DIR
EXTRA_LDFLAGS+=" -L $STATIC_LIB_DEP_DIR"
set -x
EXTRA_CMD=""
if ! test -e version.json; then
# this is Mongo 3.0
EXTRA_CMD="--rocksdb \
--variant-dir=linux2/norm
--cxx=${CXX} \
--cc=${CC} \
--use-system-zlib" # add this line back to normal code path
# when https://jira.mongodb.org/browse/SERVER-19123 is resolved
fi
scons \
LINKFLAGS="$EXTRA_LDFLAGS $EXEC_LDFLAGS $PLATFORM_LDFLAGS" \
CCFLAGS="$CXXFLAGS -L $STATIC_LIB_DEP_DIR" \
LIBS="lz4 gcc stdc++" \
LIBPATH="$ROCKSDB_PATH" \
CPPPATH="$ROCKSDB_PATH/include" \
-j32 \
--allocator=$ALLOC \
--nostrip \
--opt=on \
--disable-minimum-compiler-version-enforcement \
--use-system-snappy \
--disable-warnings-as-errors \
$EXTRA_CMD $*

View File

@ -1,5 +1,4 @@
#!/bin/sh
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
#
# Set environment variables so that we can compile rocksdb using
# fbcode settings. It uses the latest g++ and clang compilers and also
@ -14,54 +13,40 @@ source "$BASEDIR/dependencies.sh"
CFLAGS=""
# libgcc
LIBGCC_INCLUDE="$LIBGCC_BASE/include"
LIBGCC_INCLUDE="$LIBGCC_BASE/include/c++/7.3.0"
LIBGCC_LIBS=" -L $LIBGCC_BASE/lib"
# glibc
GLIBC_INCLUDE="$GLIBC_BASE/include"
GLIBC_LIBS=" -L $GLIBC_BASE/lib"
if ! test $ROCKSDB_DISABLE_SNAPPY; then
# snappy
SNAPPY_INCLUDE=" -I $SNAPPY_BASE/include/"
if test -z $PIC_BUILD; then
SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy.a"
else
SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy_pic.a"
fi
CFLAGS+=" -DSNAPPY"
# snappy
SNAPPY_INCLUDE=" -I $SNAPPY_BASE/include/"
if test -z $PIC_BUILD; then
SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy.a"
else
SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy_pic.a"
fi
CFLAGS+=" -DSNAPPY"
if test -z $PIC_BUILD; then
if ! test $ROCKSDB_DISABLE_ZLIB; then
# location of zlib headers and libraries
ZLIB_INCLUDE=" -I $ZLIB_BASE/include/"
ZLIB_LIBS=" $ZLIB_BASE/lib/libz.a"
CFLAGS+=" -DZLIB"
fi
# location of zlib headers and libraries
ZLIB_INCLUDE=" -I $ZLIB_BASE/include/"
ZLIB_LIBS=" $ZLIB_BASE/lib/libz.a"
CFLAGS+=" -DZLIB"
if ! test $ROCKSDB_DISABLE_BZIP; then
# location of bzip headers and libraries
BZIP_INCLUDE=" -I $BZIP2_BASE/include/"
BZIP_LIBS=" $BZIP2_BASE/lib/libbz2.a"
CFLAGS+=" -DBZIP2"
fi
# location of bzip headers and libraries
BZIP_INCLUDE=" -I $BZIP2_BASE/include/"
BZIP_LIBS=" $BZIP2_BASE/lib/libbz2.a"
CFLAGS+=" -DBZIP2"
if ! test $ROCKSDB_DISABLE_LZ4; then
LZ4_INCLUDE=" -I $LZ4_BASE/include/"
LZ4_LIBS=" $LZ4_BASE/lib/liblz4.a"
CFLAGS+=" -DLZ4"
fi
fi
LZ4_INCLUDE=" -I $LZ4_BASE/include/"
LZ4_LIBS=" $LZ4_BASE/lib/liblz4.a"
CFLAGS+=" -DLZ4"
if ! test $ROCKSDB_DISABLE_ZSTD; then
ZSTD_INCLUDE=" -I $ZSTD_BASE/include/"
if test -z $PIC_BUILD; then
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd.a"
else
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd_pic.a"
fi
CFLAGS+=" -DZSTD -DZSTD_STATIC_LINKING_ONLY"
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd.a"
CFLAGS+=" -DZSTD"
fi
# location of gflags headers and libraries
@ -96,10 +81,8 @@ else
fi
CFLAGS+=" -DTBB"
test "$USE_SSE" || USE_SSE=1
export USE_SSE
test "$PORTABLE" || PORTABLE=1
export PORTABLE
# use Intel SSE support for checksum calculations
export USE_SSE=" -msse -msse4.2 "
BINUTILS="$BINUTILS_BASE/bin"
AR="$BINUTILS/ar"
@ -119,24 +102,22 @@ if [ -z "$USE_CLANG" ]; then
# gcc
CC="$GCC_BASE/bin/gcc"
CXX="$GCC_BASE/bin/g++"
AR="$GCC_BASE/bin/gcc-ar"
CFLAGS+=" -B$BINUTILS/gold"
CFLAGS+=" -isystem $GLIBC_INCLUDE"
CFLAGS+=" -isystem $LIBGCC_INCLUDE"
CFLAGS+=" -isystem $GLIBC_INCLUDE"
JEMALLOC=1
else
# clang
CLANG_INCLUDE="$CLANG_LIB/clang/stable/include"
CC="$CLANG_BIN/clang"
CXX="$CLANG_BIN/clang++"
AR="$CLANG_BIN/llvm-ar"
KERNEL_HEADERS_INCLUDE="$KERNEL_HEADERS_BASE/include"
CFLAGS+=" -B$BINUTILS/gold -nostdinc -nostdlib"
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/5.x "
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/5.x/x86_64-facebook-linux "
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/7.x "
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/7.x/x86_64-facebook-linux "
CFLAGS+=" -isystem $GLIBC_INCLUDE"
CFLAGS+=" -isystem $LIBGCC_INCLUDE"
CFLAGS+=" -isystem $CLANG_INCLUDE"
@ -152,9 +133,9 @@ CXXFLAGS+=" $CFLAGS"
EXEC_LDFLAGS=" $SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $NUMA_LIB $TBB_LIBS"
EXEC_LDFLAGS+=" -B$BINUTILS/gold"
EXEC_LDFLAGS+=" -Wl,--dynamic-linker,/usr/local/fbcode/gcc-5-glibc-2.23/lib/ld.so"
EXEC_LDFLAGS+=" -Wl,--dynamic-linker,/usr/local/fbcode/platform007/lib/ld.so"
EXEC_LDFLAGS+=" $LIBUNWIND"
EXEC_LDFLAGS+=" -Wl,-rpath=/usr/local/fbcode/gcc-5-glibc-2.23/lib"
EXEC_LDFLAGS+=" -Wl,-rpath=/usr/local/fbcode/platform007/lib"
# required by libtbb
EXEC_LDFLAGS+=" -ldl"
@ -164,12 +145,4 @@ EXEC_LDFLAGS_SHARED="$SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GF
VALGRIND_VER="$VALGRIND_BASE/bin/"
LUA_PATH="$LUA_BASE"
if test -z $PIC_BUILD; then
LUA_LIB=" $LUA_PATH/lib/liblua.a"
else
LUA_LIB=" $LUA_PATH/lib/liblua_pic.a"
fi
export CC CXX AR CFLAGS CXXFLAGS EXEC_LDFLAGS EXEC_LDFLAGS_SHARED VALGRIND_VER JEMALLOC_LIB JEMALLOC_INCLUDE CLANG_ANALYZER CLANG_SCAN_BUILD LUA_PATH LUA_LIB
export CC CXX AR CFLAGS CXXFLAGS EXEC_LDFLAGS EXEC_LDFLAGS_SHARED VALGRIND_VER JEMALLOC_LIB JEMALLOC_INCLUDE CLANG_ANALYZER CLANG_SCAN_BUILD

View File

@ -1,169 +0,0 @@
#!/bin/sh
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
#
# Set environment variables so that we can compile rocksdb using
# fbcode settings. It uses the latest g++ and clang compilers and also
# uses jemalloc
# Environment variables that change the behavior of this script:
# PIC_BUILD -- if true, it will only take pic versions of libraries from fbcode. libraries that don't have pic variant will not be included
BASEDIR=`dirname $BASH_SOURCE`
source "$BASEDIR/dependencies_platform009.sh"
CFLAGS=""
# libgcc
LIBGCC_INCLUDE="$LIBGCC_BASE/include/c++/9.3.0"
LIBGCC_LIBS=" -L $LIBGCC_BASE/lib"
# glibc
GLIBC_INCLUDE="$GLIBC_BASE/include"
GLIBC_LIBS=" -L $GLIBC_BASE/lib"
if test -z $PIC_BUILD; then
MAYBE_PIC=
else
MAYBE_PIC=_pic
fi
if ! test $ROCKSDB_DISABLE_SNAPPY; then
# snappy
SNAPPY_INCLUDE=" -I $SNAPPY_BASE/include/"
SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy${MAYBE_PIC}.a"
CFLAGS+=" -DSNAPPY"
fi
if ! test $ROCKSDB_DISABLE_ZLIB; then
# location of zlib headers and libraries
ZLIB_INCLUDE=" -I $ZLIB_BASE/include/"
ZLIB_LIBS=" $ZLIB_BASE/lib/libz${MAYBE_PIC}.a"
CFLAGS+=" -DZLIB"
fi
if ! test $ROCKSDB_DISABLE_BZIP; then
# location of bzip headers and libraries
BZIP_INCLUDE=" -I $BZIP2_BASE/include/"
BZIP_LIBS=" $BZIP2_BASE/lib/libbz2${MAYBE_PIC}.a"
CFLAGS+=" -DBZIP2"
fi
if ! test $ROCKSDB_DISABLE_LZ4; then
LZ4_INCLUDE=" -I $LZ4_BASE/include/"
LZ4_LIBS=" $LZ4_BASE/lib/liblz4${MAYBE_PIC}.a"
CFLAGS+=" -DLZ4"
fi
if ! test $ROCKSDB_DISABLE_ZSTD; then
ZSTD_INCLUDE=" -I $ZSTD_BASE/include/"
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd${MAYBE_PIC}.a"
CFLAGS+=" -DZSTD"
fi
# location of gflags headers and libraries
GFLAGS_INCLUDE=" -I $GFLAGS_BASE/include/"
GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags${MAYBE_PIC}.a"
CFLAGS+=" -DGFLAGS=gflags"
BENCHMARK_INCLUDE=" -I $BENCHMARK_BASE/include/"
BENCHMARK_LIBS=" $BENCHMARK_BASE/lib/libbenchmark${MAYBE_PIC}.a"
BOOST_INCLUDE=" -I $BOOST_BASE/include/"
# location of jemalloc
JEMALLOC_INCLUDE=" -I $JEMALLOC_BASE/include/"
JEMALLOC_LIB=" $JEMALLOC_BASE/lib/libjemalloc${MAYBE_PIC}.a"
# location of numa
NUMA_INCLUDE=" -I $NUMA_BASE/include/"
NUMA_LIB=" $NUMA_BASE/lib/libnuma${MAYBE_PIC}.a"
CFLAGS+=" -DNUMA"
# location of libunwind
LIBUNWIND="$LIBUNWIND_BASE/lib/libunwind${MAYBE_PIC}.a"
# location of TBB
TBB_INCLUDE=" -isystem $TBB_BASE/include/"
TBB_LIBS="$TBB_BASE/lib/libtbb${MAYBE_PIC}.a"
CFLAGS+=" -DTBB"
# location of LIBURING
LIBURING_INCLUDE=" -isystem $LIBURING_BASE/include/"
LIBURING_LIBS="$LIBURING_BASE/lib/liburing${MAYBE_PIC}.a"
CFLAGS+=" -DLIBURING"
test "$USE_SSE" || USE_SSE=1
export USE_SSE
test "$PORTABLE" || PORTABLE=1
export PORTABLE
BINUTILS="$BINUTILS_BASE/bin"
AR="$BINUTILS/ar"
AS="$BINUTILS/as"
DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP_INCLUDE $LZ4_INCLUDE $ZSTD_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE $TBB_INCLUDE $LIBURING_INCLUDE $BENCHMARK_INCLUDE $BOOST_INCLUDE"
STDLIBS="-L $GCC_BASE/lib64"
CLANG_BIN="$CLANG_BASE/bin"
CLANG_LIB="$CLANG_BASE/lib"
CLANG_SRC="$CLANG_BASE/../../src"
CLANG_ANALYZER="$CLANG_BIN/clang++"
CLANG_SCAN_BUILD="$CLANG_SRC/llvm/clang/tools/scan-build/bin/scan-build"
if [ -z "$USE_CLANG" ]; then
# gcc
CC="$GCC_BASE/bin/gcc"
CXX="$GCC_BASE/bin/g++"
AR="$GCC_BASE/bin/gcc-ar"
CFLAGS+=" -B$BINUTILS"
CFLAGS+=" -isystem $LIBGCC_INCLUDE"
CFLAGS+=" -isystem $GLIBC_INCLUDE"
JEMALLOC=1
else
# clang
CLANG_INCLUDE="$CLANG_LIB/clang/stable/include"
CC="$CLANG_BIN/clang"
CXX="$CLANG_BIN/clang++"
AR="$CLANG_BIN/llvm-ar"
KERNEL_HEADERS_INCLUDE="$KERNEL_HEADERS_BASE/include"
CFLAGS+=" -B$BINUTILS -nostdinc -nostdlib"
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/9.x "
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/9.x/x86_64-facebook-linux "
CFLAGS+=" -isystem $GLIBC_INCLUDE"
CFLAGS+=" -isystem $LIBGCC_INCLUDE"
CFLAGS+=" -isystem $CLANG_INCLUDE"
CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE/linux "
CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE "
CFLAGS+=" -Wno-expansion-to-defined "
CXXFLAGS="-nostdinc++"
fi
CFLAGS+=" $DEPS_INCLUDE"
CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX -DROCKSDB_FALLOCATE_PRESENT -DROCKSDB_MALLOC_USABLE_SIZE -DROCKSDB_RANGESYNC_PRESENT -DROCKSDB_SCHED_GETCPU_PRESENT -DROCKSDB_SUPPORT_THREAD_LOCAL -DHAVE_SSE42 -DROCKSDB_IOURING_PRESENT"
CXXFLAGS+=" $CFLAGS"
EXEC_LDFLAGS=" $SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $NUMA_LIB $TBB_LIBS $LIBURING_LIBS $BENCHMARK_LIBS"
EXEC_LDFLAGS+=" -Wl,--dynamic-linker,/usr/local/fbcode/platform009/lib/ld.so"
EXEC_LDFLAGS+=" $LIBUNWIND"
EXEC_LDFLAGS+=" -Wl,-rpath=/usr/local/fbcode/platform009/lib"
EXEC_LDFLAGS+=" -Wl,-rpath=$GCC_BASE/lib64"
# required by libtbb
EXEC_LDFLAGS+=" -ldl"
PLATFORM_LDFLAGS="$LIBGCC_LIBS $GLIBC_LIBS $STDLIBS -lgcc -lstdc++"
PLATFORM_LDFLAGS+=" -B$BINUTILS"
EXEC_LDFLAGS_SHARED="$SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $TBB_LIBS $LIBURING_LIBS $BENCHMARK_LIBS"
VALGRIND_VER="$VALGRIND_BASE/bin/"
# lua not supported because it's on track for deprecation, I think
LUA_PATH=
LUA_LIB=
export CC CXX AR AS CFLAGS CXXFLAGS EXEC_LDFLAGS EXEC_LDFLAGS_SHARED VALGRIND_VER JEMALLOC_LIB JEMALLOC_INCLUDE CLANG_ANALYZER CLANG_SCAN_BUILD LUA_PATH LUA_LIB

View File

@ -1,175 +0,0 @@
#!/bin/sh
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
#
# Set environment variables so that we can compile rocksdb using
# fbcode settings. It uses the latest g++ and clang compilers and also
# uses jemalloc
# Environment variables that change the behavior of this script:
# PIC_BUILD -- if true, it will only take pic versions of libraries from fbcode. libraries that don't have pic variant will not be included
BASEDIR=`dirname $BASH_SOURCE`
source "$BASEDIR/dependencies_platform010.sh"
# Disallow using libraries from default locations as they might not be compatible with platform010 libraries.
CFLAGS=" --sysroot=/DOES/NOT/EXIST"
# libgcc
LIBGCC_INCLUDE="$LIBGCC_BASE/include/c++/trunk"
LIBGCC_LIBS=" -L $LIBGCC_BASE/lib -B$LIBGCC_BASE/lib/gcc/x86_64-facebook-linux/trunk/"
# glibc
GLIBC_INCLUDE="$GLIBC_BASE/include"
GLIBC_LIBS=" -L $GLIBC_BASE/lib"
GLIBC_LIBS+=" -B$GLIBC_BASE/lib"
if test -z $PIC_BUILD; then
MAYBE_PIC=
else
MAYBE_PIC=_pic
fi
if ! test $ROCKSDB_DISABLE_SNAPPY; then
# snappy
SNAPPY_INCLUDE=" -I $SNAPPY_BASE/include/"
SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy${MAYBE_PIC}.a"
CFLAGS+=" -DSNAPPY"
fi
if ! test $ROCKSDB_DISABLE_ZLIB; then
# location of zlib headers and libraries
ZLIB_INCLUDE=" -I $ZLIB_BASE/include/"
ZLIB_LIBS=" $ZLIB_BASE/lib/libz${MAYBE_PIC}.a"
CFLAGS+=" -DZLIB"
fi
if ! test $ROCKSDB_DISABLE_BZIP; then
# location of bzip headers and libraries
BZIP_INCLUDE=" -I $BZIP2_BASE/include/"
BZIP_LIBS=" $BZIP2_BASE/lib/libbz2${MAYBE_PIC}.a"
CFLAGS+=" -DBZIP2"
fi
if ! test $ROCKSDB_DISABLE_LZ4; then
LZ4_INCLUDE=" -I $LZ4_BASE/include/"
LZ4_LIBS=" $LZ4_BASE/lib/liblz4${MAYBE_PIC}.a"
CFLAGS+=" -DLZ4"
fi
if ! test $ROCKSDB_DISABLE_ZSTD; then
ZSTD_INCLUDE=" -I $ZSTD_BASE/include/"
ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd${MAYBE_PIC}.a"
CFLAGS+=" -DZSTD"
fi
# location of gflags headers and libraries
GFLAGS_INCLUDE=" -I $GFLAGS_BASE/include/"
GFLAGS_LIBS=" $GFLAGS_BASE/lib/libgflags${MAYBE_PIC}.a"
CFLAGS+=" -DGFLAGS=gflags"
BENCHMARK_INCLUDE=" -I $BENCHMARK_BASE/include/"
BENCHMARK_LIBS=" $BENCHMARK_BASE/lib/libbenchmark${MAYBE_PIC}.a"
# location of jemalloc
JEMALLOC_INCLUDE=" -I $JEMALLOC_BASE/include/"
JEMALLOC_LIB=" $JEMALLOC_BASE/lib/libjemalloc${MAYBE_PIC}.a"
# location of numa
NUMA_INCLUDE=" -I $NUMA_BASE/include/"
NUMA_LIB=" $NUMA_BASE/lib/libnuma${MAYBE_PIC}.a"
CFLAGS+=" -DNUMA"
# location of libunwind
LIBUNWIND="$LIBUNWIND_BASE/lib/libunwind${MAYBE_PIC}.a"
# location of TBB
TBB_INCLUDE=" -isystem $TBB_BASE/include/"
TBB_LIBS="$TBB_BASE/lib/libtbb${MAYBE_PIC}.a"
CFLAGS+=" -DTBB"
# location of LIBURING
LIBURING_INCLUDE=" -isystem $LIBURING_BASE/include/"
LIBURING_LIBS="$LIBURING_BASE/lib/liburing${MAYBE_PIC}.a"
CFLAGS+=" -DLIBURING"
test "$USE_SSE" || USE_SSE=1
export USE_SSE
test "$PORTABLE" || PORTABLE=1
export PORTABLE
BINUTILS="$BINUTILS_BASE/bin"
AR="$BINUTILS/ar"
AS="$BINUTILS/as"
DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP_INCLUDE $LZ4_INCLUDE $ZSTD_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE $TBB_INCLUDE $LIBURING_INCLUDE $BENCHMARK_INCLUDE"
STDLIBS="-L $GCC_BASE/lib64"
CLANG_BIN="$CLANG_BASE/bin"
CLANG_LIB="$CLANG_BASE/lib"
CLANG_SRC="$CLANG_BASE/../../src"
CLANG_ANALYZER="$CLANG_BIN/clang++"
CLANG_SCAN_BUILD="$CLANG_SRC/llvm/clang/tools/scan-build/bin/scan-build"
if [ -z "$USE_CLANG" ]; then
# gcc
CC="$GCC_BASE/bin/gcc"
CXX="$GCC_BASE/bin/g++"
AR="$GCC_BASE/bin/gcc-ar"
CFLAGS+=" -B$BINUTILS -nostdinc -nostdlib"
CFLAGS+=" -I$GCC_BASE/include"
CFLAGS+=" -isystem $GCC_BASE/lib/gcc/x86_64-redhat-linux-gnu/11.2.1/include"
CFLAGS+=" -isystem $GCC_BASE/lib/gcc/x86_64-redhat-linux-gnu/11.2.1/install-tools/include"
CFLAGS+=" -isystem $GCC_BASE/lib/gcc/x86_64-redhat-linux-gnu/11.2.1/include-fixed/"
CFLAGS+=" -isystem $LIBGCC_INCLUDE"
CFLAGS+=" -isystem $GLIBC_INCLUDE"
CFLAGS+=" -I$GLIBC_INCLUDE"
CFLAGS+=" -I$LIBGCC_BASE/include"
CFLAGS+=" -I$LIBGCC_BASE/include/c++/11.x/"
CFLAGS+=" -I$LIBGCC_BASE/include/c++/11.x/x86_64-facebook-linux/"
CFLAGS+=" -I$LIBGCC_BASE/include/c++/11.x/backward"
CFLAGS+=" -isystem $GLIBC_INCLUDE -I$GLIBC_INCLUDE"
JEMALLOC=1
else
# clang
CLANG_INCLUDE="$CLANG_LIB/clang/stable/include"
CC="$CLANG_BIN/clang"
CXX="$CLANG_BIN/clang++"
AR="$CLANG_BIN/llvm-ar"
CFLAGS+=" -B$BINUTILS -nostdinc -nostdlib"
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/trunk "
CFLAGS+=" -isystem $LIBGCC_BASE/include/c++/trunk/x86_64-facebook-linux "
CFLAGS+=" -isystem $GLIBC_INCLUDE"
CFLAGS+=" -isystem $LIBGCC_INCLUDE"
CFLAGS+=" -isystem $CLANG_INCLUDE"
CFLAGS+=" -Wno-expansion-to-defined "
CXXFLAGS="-nostdinc++"
fi
KERNEL_HEADERS_INCLUDE="$KERNEL_HEADERS_BASE/include"
CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE/linux "
CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE "
CFLAGS+=" $DEPS_INCLUDE"
CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX -DROCKSDB_FALLOCATE_PRESENT -DROCKSDB_MALLOC_USABLE_SIZE -DROCKSDB_RANGESYNC_PRESENT -DROCKSDB_SCHED_GETCPU_PRESENT -DROCKSDB_SUPPORT_THREAD_LOCAL -DHAVE_SSE42 -DROCKSDB_IOURING_PRESENT"
CXXFLAGS+=" $CFLAGS"
EXEC_LDFLAGS=" $SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $NUMA_LIB $TBB_LIBS $LIBURING_LIBS $BENCHMARK_LIBS"
EXEC_LDFLAGS+=" -Wl,--dynamic-linker,/usr/local/fbcode/platform010/lib/ld.so"
EXEC_LDFLAGS+=" $LIBUNWIND"
EXEC_LDFLAGS+=" -Wl,-rpath=/usr/local/fbcode/platform010/lib"
EXEC_LDFLAGS+=" -Wl,-rpath=$GCC_BASE/lib64"
# required by libtbb
EXEC_LDFLAGS+=" -ldl"
PLATFORM_LDFLAGS="$LIBGCC_LIBS $GLIBC_LIBS $STDLIBS -lgcc -lstdc++"
PLATFORM_LDFLAGS+=" -B$BINUTILS"
EXEC_LDFLAGS_SHARED="$SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $TBB_LIBS $LIBURING_LIBS $BENCHMARK_LIBS"
VALGRIND_VER="$VALGRIND_BASE/bin/"
export CC CXX AR AS CFLAGS CXXFLAGS EXEC_LDFLAGS EXEC_LDFLAGS_SHARED VALGRIND_VER JEMALLOC_LIB JEMALLOC_INCLUDE CLANG_ANALYZER CLANG_SCAN_BUILD LUA_PATH LUA_LIB

View File

@ -1,102 +1,33 @@
#!/usr/bin/env bash
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
#!/bin/bash
# If clang_format_diff.py command is not specfied, we assume we are able to
# access directly without any path.
if [ -z $CLANG_FORMAT_DIFF ]
then
CLANG_FORMAT_DIFF="clang-format-diff.py"
fi
print_usage () {
echo "Usage:"
echo "format-diff.sh [OPTIONS]"
echo "-c: check only."
echo "-h: print this message."
}
# Check clang-format-diff.py
if ! which $CLANG_FORMAT_DIFF &> /dev/null
then
echo "You didn't have clang-format-diff.py available in your computer!"
echo "You can download it by running: "
echo " curl http://goo.gl/iUW1u2"
exit 128
fi
while getopts ':ch' OPTION; do
case "$OPTION" in
c)
CHECK_ONLY=1
;;
h)
print_usage
exit 1
;;
?)
print_usage
exit 1
;;
esac
done
# Check argparse, a library that clang-format-diff.py requires.
python 2>/dev/null << EOF
import argparse
EOF
REPO_ROOT="$(git rev-parse --show-toplevel)"
if [ "$CLANG_FORMAT_DIFF" ]; then
echo "Note: CLANG_FORMAT_DIFF='$CLANG_FORMAT_DIFF'"
# Dry run to confirm dependencies like argparse
if $CLANG_FORMAT_DIFF --help >/dev/null < /dev/null; then
true #Good
else
exit 128
fi
else
# First try directly executing the possibilities
if clang-format-diff --help &> /dev/null < /dev/null; then
CLANG_FORMAT_DIFF=clang-format-diff
elif clang-format-diff.py --help &> /dev/null < /dev/null; then
CLANG_FORMAT_DIFF=clang-format-diff.py
elif $REPO_ROOT/clang-format-diff.py --help &> /dev/null < /dev/null; then
CLANG_FORMAT_DIFF=$REPO_ROOT/clang-format-diff.py
else
# This probably means we need to directly invoke the interpreter.
# But first find clang-format-diff.py
if [ -f "$REPO_ROOT/clang-format-diff.py" ]; then
CFD_PATH="$REPO_ROOT/clang-format-diff.py"
elif which clang-format-diff.py &> /dev/null; then
CFD_PATH="$(which clang-format-diff.py)"
else
echo "You didn't have clang-format-diff.py and/or clang-format available in your computer!"
echo "You can download clang-format-diff.py by running: "
echo " curl --location https://raw.githubusercontent.com/llvm/llvm-project/main/clang/tools/clang-format/clang-format-diff.py -o ${REPO_ROOT}/clang-format-diff.py"
echo "You should make sure the downloaded script is not compromised."
echo "You can download clang-format by running:"
echo " brew install clang-format"
echo " Or"
echo " apt install clang-format"
echo " This might work too:"
echo " yum install git-clang-format"
echo "Then make sure clang-format is available and executable from \$PATH:"
echo " clang-format --version"
exit 128
fi
# Check argparse pre-req on interpreter, or it will fail
if echo import argparse | ${PYTHON:-python3}; then
true # Good
else
echo "To run clang-format-diff.py, we'll need the library "argparse" to be"
echo "installed. You can try either of the follow ways to install it:"
echo " 1. Manually download argparse: https://pypi.python.org/pypi/argparse"
echo " 2. easy_install argparse (if you have easy_install)"
echo " 3. pip install argparse (if you have pip)"
exit 129
fi
# Unfortunately, some machines have a Python2 clang-format-diff.py
# installed but only a Python3 interpreter installed. Unfortunately,
# automatic 2to3 migration is insufficient, so suggest downloading latest.
if grep -q "print '" "$CFD_PATH" && \
${PYTHON:-python3} --version | grep -q 'ython 3'; then
echo "You have clang-format-diff.py for Python 2 but are using a Python 3"
echo "interpreter (${PYTHON:-python3})."
echo "You can download clang-format-diff.py for Python 3 by running: "
echo " curl --location https://raw.githubusercontent.com/llvm/llvm-project/main/clang/tools/clang-format/clang-format-diff.py -o ${REPO_ROOT}/clang-format-diff.py"
echo "You should make sure the downloaded script is not compromised."
exit 130
fi
CLANG_FORMAT_DIFF="${PYTHON:-python3} $CFD_PATH"
# This had better work after all those checks
if $CLANG_FORMAT_DIFF --help >/dev/null < /dev/null; then
true #Good
else
exit 128
fi
fi
if [ "$?" != 0 ]
then
echo "To run clang-format-diff.py, we'll need the library "argparse" to be"
echo "installed. You can try either of the follow ways to install it:"
echo " 1. Manually download argparse: https://pypi.python.org/pypi/argparse"
echo " 2. easy_install argparse (if you have easy_install)"
echo " 3. pip install argparse (if you have pip)"
exit 129
fi
# TODO(kailiu) following work is not complete since we still need to figure
@ -121,60 +52,33 @@ set -e
uncommitted_code=`git diff HEAD`
# If there's no uncommitted changes, we assume user are doing post-commit
# format check, in which case we'll try to check the modified lines vs. the
# facebook/rocksdb.git main branch. Otherwise, we'll check format of the
# uncommitted code only.
# format check, in which case we'll check the modified lines from latest commit.
# Otherwise, we'll check format of the uncommitted code only.
if [ -z "$uncommitted_code" ]
then
# Attempt to get name of facebook/rocksdb.git remote.
[ "$FORMAT_REMOTE" ] || FORMAT_REMOTE="$(LC_ALL=POSIX LANG=POSIX git remote -v | grep 'facebook/rocksdb.git' | head -n 1 | cut -f 1)"
# Fall back on 'origin' if that fails
[ "$FORMAT_REMOTE" ] || FORMAT_REMOTE=origin
# Use main branch from that remote
[ "$FORMAT_UPSTREAM" ] || FORMAT_UPSTREAM="$FORMAT_REMOTE/$(LC_ALL=POSIX LANG=POSIX git remote show $FORMAT_REMOTE | sed -n '/HEAD branch/s/.*: //p')"
# Get the common ancestor with that remote branch. Everything after that
# common ancestor would be considered the contents of a pull request, so
# should be relevant for formatting fixes.
FORMAT_UPSTREAM_MERGE_BASE="$(git merge-base "$FORMAT_UPSTREAM" HEAD)"
# Get the differences
diffs=$(git diff -U0 "$FORMAT_UPSTREAM_MERGE_BASE" | $CLANG_FORMAT_DIFF -p 1)
echo "Checking format of changes not yet in $FORMAT_UPSTREAM..."
# Check the format of last commit
diffs=$(git diff -U0 HEAD^ | $CLANG_FORMAT_DIFF -p 1)
else
# Check the format of uncommitted lines,
diffs=$(git diff -U0 HEAD | $CLANG_FORMAT_DIFF -p 1)
echo "Checking format of uncommitted changes..."
fi
if [ -z "$diffs" ]
then
echo "Nothing needs to be reformatted!"
exit 0
elif [ $CHECK_ONLY ]
then
echo "Your change has unformatted code. Please run make format!"
if [ $VERBOSE_CHECK ]; then
clang-format --version
echo "$diffs"
fi
exit 1
fi
# Highlight the insertion/deletion from the clang-format-diff.py's output
COLOR_END="\033[0m"
COLOR_RED="\033[0;31m"
COLOR_GREEN="\033[0;32m"
COLOR_RED="\033[0;31m"
COLOR_GREEN="\033[0;32m"
echo -e "Detect lines that doesn't follow the format rules:\r"
# Add the color to the diff. lines added will be green; lines removed will be red.
echo "$diffs" |
echo "$diffs" |
sed -e "s/\(^-.*$\)/`echo -e \"$COLOR_RED\1$COLOR_END\"`/" |
sed -e "s/\(^+.*$\)/`echo -e \"$COLOR_GREEN\1$COLOR_END\"`/"
if [[ "$OPT" == *"-DTRAVIS"* ]]
then
exit 1
fi
echo -e "Would you like to fix the format automatically (y/n): \c"
# Make sure under any mode, we can read user input.
@ -187,12 +91,7 @@ then
fi
# Do in-place format adjustment.
if [ -z "$uncommitted_code" ]
then
git diff -U0 "$FORMAT_UPSTREAM_MERGE_BASE" | $CLANG_FORMAT_DIFF -i -p 1
else
git diff -U0 HEAD | $CLANG_FORMAT_DIFF -i -p 1
fi
git diff -U0 HEAD^ | $CLANG_FORMAT_DIFF -i -p 1
echo "Files reformatted!"
# Amend to last commit if user do the post-commit format check

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,25 @@
#!/bin/sh
# Install gflags for mac developers.
set -e
DIR=`mktemp -d /tmp/rocksdb_gflags_XXXX`
cd $DIR
wget https://gflags.googlecode.com/files/gflags-2.0.tar.gz
tar xvfz gflags-2.0.tar.gz
cd gflags-2.0
./configure
make
make install
# Add include/lib path for g++
echo 'export LIBRARY_PATH+=":/usr/local/lib"' >> ~/.bash_profile
echo 'export CPATH+=":/usr/local/include"' >> ~/.bash_profile
echo ""
echo "-----------------------------------------------------------------------------"
echo "| Installation Completed |"
echo "-----------------------------------------------------------------------------"
echo "Please run \`. ~/.bash_profile\` to be able to compile with gflags"

46
build_tools/make_new_version.sh Executable file
View File

@ -0,0 +1,46 @@
#!/bin/bash
# Copyright (c) 2013, Facebook, Inc. All rights reserved.
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree. An additional grant
# of patent rights can be found in the PATENTS file in the same directory.
set -e
if [ -z "$GIT" ]
then
GIT="git"
fi
# Print out the colored progress info so that it can be brainlessly
# distinguished by users.
function title() {
echo -e "\033[1;32m$*\033[0m"
}
usage="Create new RocksDB version and prepare it for the release process\n"
usage+="USAGE: ./make_new_version.sh <version>"
# -- Pre-check
if [[ $# < 1 ]]; then
echo -e $usage
exit 1
fi
ROCKSDB_VERSION=$1
GIT_BRANCH=`git rev-parse --abbrev-ref HEAD`
echo $GIT_BRANCH
if [ $GIT_BRANCH != "master" ]; then
echo "Error: Current branch is '$GIT_BRANCH', Please switch to master branch."
exit 1
fi
title "Adding new tag for this release ..."
BRANCH="$ROCKSDB_VERSION.fb"
$GIT co -b $BRANCH
# Setting up the proxy for remote repo access
title "Pushing new branch to remote repo ..."
git push origin --set-upstream $BRANCH
title "Branch $BRANCH is pushed to github;"

View File

@ -1,6 +1,4 @@
# shellcheck disable=SC1113
#/usr/bin/env bash
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
set -e
@ -30,14 +28,12 @@ function package() {
if dpkg --get-selections | grep --quiet $1; then
log "$1 is already installed. skipping."
else
# shellcheck disable=SC2068
apt-get install $@ -y
fi
elif [[ $OS = "centos" ]]; then
if rpm -qa | grep --quiet $1; then
log "$1 is already installed. skipping."
else
# shellcheck disable=SC2068
yum install $@ -y
fi
fi
@ -56,7 +52,6 @@ function gem_install() {
if gem list | grep --quiet $1; then
log "$1 is already installed. skipping."
else
# shellcheck disable=SC2068
gem install $@
fi
}
@ -70,8 +65,8 @@ function main() {
if [[ -d /vagrant ]]; then
if [[ $OS = "ubuntu" ]]; then
package g++-4.8
export CXX=g++-4.8
package g++-4.7
export CXX=g++-4.7
# the deb would depend on libgflags2, but the static lib is the only thing
# installed by make install
@ -103,27 +98,19 @@ function main() {
gem_install fpm
make static_lib
LIBDIR=/usr/lib
if [[ $FPM_OUTPUT = "rpm" ]]; then
LIBDIR=$(rpm --eval '%_libdir')
fi
rm -rf package
make install DESTDIR=package PREFIX=/usr LIBDIR=$LIBDIR
make install INSTALL_PATH=package
fpm \
-s dir \
-t $FPM_OUTPUT \
-C package \
-n rocksdb \
-v $1 \
--prefix /usr \
--url http://rocksdb.org/ \
-m rocksdb@fb.com \
--license BSD \
--vendor Facebook \
--description "RocksDB is an embeddable persistent key-value store for fast storage." \
usr
package
}
# shellcheck disable=SC2068
main $@

View File

@ -1,5 +1,4 @@
#!/usr/bin/env bash
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
#!/bin/bash
set -e
@ -20,11 +19,26 @@ STAT_FILE=${STAT_FILE:-$(mktemp -t -u rocksdb_test_stats_XXXX)}
function cleanup {
rm -rf $DATA_DIR
rm -f $STAT_FILE.*
rm -f $STAT_FILE.fillseq
rm -f $STAT_FILE.readrandom
rm -f $STAT_FILE.overwrite
rm -f $STAT_FILE.memtablefillreadrandom
}
trap cleanup EXIT
if [ -z $GIT_BRANCH ]; then
git_br=`git rev-parse --abbrev-ref HEAD`
else
git_br=$(basename $GIT_BRANCH)
fi
if [ $git_br == "master" ]; then
git_br=""
else
git_br="."$git_br
fi
make release
# measure fillseq + fill up the DB for overwrite benchmark
@ -41,6 +55,7 @@ make release
--open_files=55000 \
--statistics=1 \
--histogram=1 \
--disable_data_sync=1 \
--disable_wal=1 \
--sync=0 > ${STAT_FILE}.fillseq
@ -58,6 +73,7 @@ make release
--open_files=55000 \
--statistics=1 \
--histogram=1 \
--disable_data_sync=1 \
--disable_wal=1 \
--sync=0 \
--threads=8 > ${STAT_FILE}.overwrite
@ -76,6 +92,7 @@ make release
--open_files=55000 \
--statistics=1 \
--histogram=1 \
--disable_data_sync=1 \
--disable_wal=1 \
--sync=0 \
--threads=1 > /dev/null
@ -94,6 +111,7 @@ make release
--open_files=55000 \
--statistics=1 \
--histogram=1 \
--disable_data_sync=1 \
--disable_wal=1 \
--sync=0 \
--threads=16 > ${STAT_FILE}.readrandom
@ -113,6 +131,7 @@ make release
--use_tailing_iterator=1 \
--statistics=1 \
--histogram=1 \
--disable_data_sync=1 \
--disable_wal=1 \
--sync=0 \
--threads=16 > ${STAT_FILE}.readrandomtailing
@ -131,6 +150,7 @@ make release
--open_files=55000 \
--statistics=1 \
--histogram=1 \
--disable_data_sync=1 \
--disable_wal=1 \
--sync=0 \
--threads=16 > ${STAT_FILE}.readrandomsmallblockcache
@ -151,6 +171,7 @@ make release
--open_files=55000 \
--statistics=1 \
--histogram=1 \
--disable_data_sync=1 \
--disable_wal=1 \
--sync=0 \
--threads=16 > ${STAT_FILE}.readrandom_mem_sst
@ -170,6 +191,7 @@ make release
--open_files=55000 \
--statistics=1 \
--histogram=1 \
--disable_data_sync=1 \
--disable_wal=1 \
--sync=0 \
--threads=1 > /dev/null
@ -188,6 +210,7 @@ make release
--open_files=55000 \
--statistics=1 \
--histogram=1 \
--disable_data_sync=1 \
--disable_wal=1 \
--sync=0 \
--threads=16 > /dev/null
@ -207,6 +230,7 @@ make release
--disable_auto_compactions=1 \
--statistics=1 \
--histogram=1 \
--disable_data_sync=1 \
--disable_wal=1 \
--sync=0 \
--threads=16 > ${STAT_FILE}.readrandom_filluniquerandom
@ -219,7 +243,7 @@ make release
--bloom_bits=10 \
--num=$((NUM / 4)) \
--reads=$((NUM / 4)) \
--benchmark_write_rate_limit=$(( 110 * 1024 )) \
--writes_per_second=1000 \
--write_buffer_size=100000000 \
--cache_size=6442450944 \
--cache_numshardbits=6 \
@ -227,6 +251,7 @@ make release
--open_files=55000 \
--statistics=1 \
--histogram=1 \
--disable_data_sync=1 \
--disable_wal=1 \
--sync=0 \
--threads=16 > ${STAT_FILE}.readwhilewriting
@ -245,6 +270,7 @@ make release
--open_files=55000 \
--statistics=1 \
--histogram=1 \
--disable_data_sync=1 \
--disable_wal=1 \
--sync=0 \
--value_size=10 \
@ -258,6 +284,7 @@ common_in_mem_args="--db=/dev/shm/rocksdb \
--value_size=100 \
--compression_type=none \
--compression_ratio=1 \
--hard_rate_limit=2 \
--write_buffer_size=134217728 \
--max_write_buffer_number=4 \
--level0_file_num_compaction_trigger=8 \
@ -268,12 +295,15 @@ common_in_mem_args="--db=/dev/shm/rocksdb \
--disable_wal=0 \
--wal_dir=/dev/shm/rocksdb \
--sync=0 \
--disable_data_sync=1 \
--verify_checksum=1 \
--delete_obsolete_files_period_micros=314572800 \
--max_grandparent_overlap_factor=10 \
--use_plain_table=1 \
--open_files=-1 \
--mmap_read=1 \
--mmap_write=0 \
--memtablerep=prefix_hash \
--bloom_bits=10 \
--bloom_locality=1 \
--perf_level=0"
@ -299,7 +329,7 @@ common_in_mem_args="--db=/dev/shm/rocksdb \
--use_existing_db=1 \
--duration=600 \
--threads=32 \
--benchmark_write_rate_limit=9502720 > ${STAT_FILE}.readwhilewriting_in_ram
--writes_per_second=81920 > ${STAT_FILE}.readwhilewriting_in_ram
# Seekrandomwhilewriting
./db_bench \
@ -312,7 +342,7 @@ common_in_mem_args="--db=/dev/shm/rocksdb \
--use_tailing_iterator=1 \
--duration=600 \
--threads=32 \
--benchmark_write_rate_limit=9502720 > ${STAT_FILE}.seekwhilewriting_in_ram
--writes_per_second=81920 > ${STAT_FILE}.seekwhilewriting_in_ram
# measure fillseq with bunch of column families
./db_bench \
@ -326,6 +356,7 @@ common_in_mem_args="--db=/dev/shm/rocksdb \
--open_files=55000 \
--statistics=1 \
--histogram=1 \
--disable_data_sync=1 \
--disable_wal=1 \
--sync=0 > ${STAT_FILE}.fillseq_lots_column_families
@ -341,6 +372,7 @@ common_in_mem_args="--db=/dev/shm/rocksdb \
--open_files=55000 \
--statistics=1 \
--histogram=1 \
--disable_data_sync=1 \
--disable_wal=1 \
--sync=0 \
--threads=8 > ${STAT_FILE}.overwrite_lots_column_families
@ -360,7 +392,7 @@ function send_to_ods {
echo >&2 "ERROR: Key $key doesn't have a value."
return
fi
curl --silent "https://www.intern.facebook.com/intern/agent/ods_set.php?entity=rocksdb_build&key=$key&value=$value" \
curl -s "https://www.intern.facebook.com/intern/agent/ods_set.php?entity=rocksdb_build$git_br&key=$key&value=$value" \
--connect-timeout 60
}

View File

@ -1,493 +0,0 @@
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
# This script enables you running RocksDB tests by running
# All the tests concurrently and utilizing all the cores
Param(
[switch]$EnableJE = $false, # Look for and use test executable, append _je to listed exclusions
[switch]$RunAll = $false, # Will attempt discover all *_test[_je].exe binaries and run all
# of them as Google suites. I.e. It will run test cases concurrently
# except those mentioned as $Run, those will run as individual test cases
# And any execlued with $ExcludeExes or $ExcludeCases
# It will also not run any individual test cases
# excluded but $ExcludeCasese
[switch]$RunAllExe = $false, # Look for and use test exdcutables, append _je to exclusions automatically
# It will attempt to run them in parallel w/o breaking them up on individual
# test cases. Those listed with $ExcludeExes will be excluded
[string]$SuiteRun = "", # Split test suites in test cases and run in parallel, not compatible with $RunAll
[string]$Run = "", # Run specified executables in parallel but do not split to test cases
[string]$ExcludeCases = "", # Exclude test cases, expects a comma separated list, no spaces
# Takes effect when $RunAll or $SuiteRun is specified. Must have full
# Test cases name including a group and a parameter if any
[string]$ExcludeExes = "", # Exclude exes from consideration, expects a comma separated list,
# no spaces. Takes effect only when $RunAll is specified
[string]$WorkFolder = "", # Direct tests to use that folder. SSD or Ram drive are better options.
# Number of async tasks that would run concurrently. Recommend a number below 64.
# However, CPU utlization really depends on the storage media. Recommend ram based disk.
# a value of 1 will run everything serially
[int]$Concurrency = 8,
[int]$Limit = -1 # -1 means do not limit for test purposes
)
# Folders and commands must be fullpath to run assuming
# the current folder is at the root of the git enlistment
$StartDate = (Get-Date)
$StartDate
$DebugPreference = "Continue"
# These tests are not google test suites and we should guard
# Against running them as suites
$RunOnly = New-Object System.Collections.Generic.HashSet[string]
$RunOnly.Add("c_test") | Out-Null
$RunOnly.Add("compact_on_deletion_collector_test") | Out-Null
$RunOnly.Add("merge_test") | Out-Null
$RunOnly.Add("stringappend_test") | Out-Null # Apparently incorrectly written
$RunOnly.Add("backup_engine_test") | Out-Null # Disabled
$RunOnly.Add("timer_queue_test") | Out-Null # Not a gtest
if($RunAll -and $SuiteRun -ne "") {
Write-Error "$RunAll and $SuiteRun are not compatible"
exit 1
}
if($RunAllExe -and $Run -ne "") {
Write-Error "$RunAllExe and $Run are not compatible"
exit 1
}
# If running under Appveyor assume that root
[string]$Appveyor = $Env:APPVEYOR_BUILD_FOLDER
if($Appveyor -ne "") {
$RootFolder = $Appveyor
} else {
$RootFolder = $PSScriptRoot -replace '\\build_tools', ''
}
$LogFolder = -Join($RootFolder, "\db_logs\")
$BinariesFolder = -Join($RootFolder, "\build\Debug\")
if($WorkFolder -eq "") {
# If TEST_TMPDIR is set use it
[string]$var = $Env:TEST_TMPDIR
if($var -eq "") {
$WorkFolder = -Join($RootFolder, "\db_tests\")
$Env:TEST_TMPDIR = $WorkFolder
} else {
$WorkFolder = $var
}
} else {
# Override from a command line
$Env:TEST_TMPDIR = $WorkFolder
}
Write-Output "Root: $RootFolder, WorkFolder: $WorkFolder"
Write-Output "BinariesFolder: $BinariesFolder, LogFolder: $LogFolder"
# Create test directories in the current folder
md -Path $WorkFolder -ErrorAction Ignore | Out-Null
md -Path $LogFolder -ErrorAction Ignore | Out-Null
$ExcludeCasesSet = New-Object System.Collections.Generic.HashSet[string]
if($ExcludeCases -ne "") {
Write-Host "ExcludeCases: $ExcludeCases"
$l = $ExcludeCases -split ' '
ForEach($t in $l) {
$ExcludeCasesSet.Add($t) | Out-Null
}
}
$ExcludeExesSet = New-Object System.Collections.Generic.HashSet[string]
if($ExcludeExes -ne "") {
Write-Host "ExcludeExe: $ExcludeExes"
$l = $ExcludeExes -split ' '
ForEach($t in $l) {
$ExcludeExesSet.Add($t) | Out-Null
}
}
# Extract the names of its tests by running db_test with --gtest_list_tests.
# This filter removes the "#"-introduced comments, and expands to
# fully-qualified names by changing input like this:
#
# DBTest.
# Empty
# WriteEmptyBatch
# MultiThreaded/MultiThreadedDBTest.
# MultiThreaded/0 # GetParam() = 0
# MultiThreaded/1 # GetParam() = 1
# RibbonTypeParamTest/0. # TypeParam = struct DefaultTypesAndSettings
# CompactnessAndBacktrackAndFpRate
# Extremes
# FindOccupancyForSuccessRate
#
# into this:
#
# DBTest.Empty
# DBTest.WriteEmptyBatch
# MultiThreaded/MultiThreadedDBTest.MultiThreaded/0
# MultiThreaded/MultiThreadedDBTest.MultiThreaded/1
# RibbonTypeParamTest/0.CompactnessAndBacktrackAndFpRate
# RibbonTypeParamTest/0.Extremes
# RibbonTypeParamTest/0.FindOccupancyForSuccessRate
#
# Output into the parameter in a form TestName -> Log File Name
function ExtractTestCases([string]$GTestExe, $HashTable) {
$Tests = @()
# Run db_test to get a list of tests and store it into $a array
&$GTestExe --gtest_list_tests | tee -Variable Tests | Out-Null
# Current group
$Group=""
ForEach( $l in $Tests) {
# remove trailing comment if any
$l = $l -replace '\s+\#.*',''
# Leading whitespace is fine
$l = $l -replace '^\s+',''
# Trailing dot is a test group but no whitespace
if ($l -match "\.$" -and $l -notmatch "\s+") {
$Group = $l
} else {
# Otherwise it is a test name, remove leading space
$test = $l
# create a log name
$test = "$Group$test"
if($ExcludeCasesSet.Contains($test)) {
Write-Warning "$test case is excluded"
continue
}
$test_log = $test -replace '[\./]','_'
$test_log += ".log"
$log_path = -join ($LogFolder, $test_log)
# Add to a hashtable
$HashTable.Add($test, $log_path);
}
}
}
# The function removes trailing .exe siffix if any,
# creates a name for the log file
# Then adds the test name if it was not excluded into
# a HashTable in a form of test_name -> log_path
function MakeAndAdd([string]$token, $HashTable) {
$test_name = $token -replace '.exe$', ''
$log_name = -join ($test_name, ".log")
$log_path = -join ($LogFolder, $log_name)
$HashTable.Add($test_name, $log_path)
}
# This function takes a list of Suites to run
# Lists all the test cases in each of the suite
# and populates HashOfHashes
# Ordered by suite(exe) @{ Exe = @{ TestCase = LogName }}
function ProcessSuites($ListOfSuites, $HashOfHashes) {
$suite_list = $ListOfSuites
# Problem: if you run --gtest_list_tests on
# a non Google Test executable then it will start executing
# and we will get nowhere
ForEach($suite in $suite_list) {
if($RunOnly.Contains($suite)) {
Write-Warning "$suite is excluded from running as Google test suite"
continue
}
if($EnableJE) {
$suite += "_je"
}
$Cases = [ordered]@{}
$Cases.Clear()
$suite_exe = -Join ($BinariesFolder, $suite)
ExtractTestCases -GTestExe $suite_exe -HashTable $Cases
if($Cases.Count -gt 0) {
$HashOfHashes.Add($suite, $Cases);
}
}
# Make logs and run
if($CasesToRun.Count -lt 1) {
Write-Error "Failed to extract tests from $SuiteRun"
exit 1
}
}
# This will contain all test executables to run
# Hash table that contains all non suite
# Test executable to run
$TestExes = [ordered]@{}
# Check for test exe that are not
# Google Test Suites
# Since this is explicitely mentioned it is not subject
# for exclusions
if($Run -ne "") {
$test_list = $Run -split ' '
ForEach($t in $test_list) {
if($EnableJE) {
$t += "_je"
}
MakeAndAdd -token $t -HashTable $TestExes
}
if($TestExes.Count -lt 1) {
Write-Error "Failed to extract tests from $Run"
exit 1
}
} elseif($RunAllExe) {
# Discover all the test binaries
if($EnableJE) {
$pattern = "*_test_je.exe"
} else {
$pattern = "*_test.exe"
}
$search_path = -join ($BinariesFolder, $pattern)
Write-Host "Binaries Search Path: $search_path"
$DiscoveredExe = @()
dir -Path $search_path | ForEach-Object {
$DiscoveredExe += ($_.Name)
}
# Remove exclusions
ForEach($e in $DiscoveredExe) {
$e = $e -replace '.exe$', ''
$bare_name = $e -replace '_je$', ''
if($ExcludeExesSet.Contains($bare_name)) {
Write-Warning "Test $e is excluded"
continue
}
MakeAndAdd -token $e -HashTable $TestExes
}
if($TestExes.Count -lt 1) {
Write-Error "Failed to discover test executables"
exit 1
}
}
# Ordered by exe @{ Exe = @{ TestCase = LogName }}
$CasesToRun = [ordered]@{}
if($SuiteRun -ne "") {
$suite_list = $SuiteRun -split ' '
ProcessSuites -ListOfSuites $suite_list -HashOfHashes $CasesToRun
} elseif ($RunAll) {
# Discover all the test binaries
if($EnableJE) {
$pattern = "*_test_je.exe"
} else {
$pattern = "*_test.exe"
}
$search_path = -join ($BinariesFolder, $pattern)
Write-Host "Binaries Search Path: $search_path"
$ListOfExe = @()
dir -Path $search_path | ForEach-Object {
$ListOfExe += ($_.Name)
}
# Exclude those in RunOnly from running as suites
$ListOfSuites = @()
ForEach($e in $ListOfExe) {
$e = $e -replace '.exe$', ''
$bare_name = $e -replace '_je$', ''
if($ExcludeExesSet.Contains($bare_name)) {
Write-Warning "Test $e is excluded"
continue
}
if($RunOnly.Contains($bare_name)) {
MakeAndAdd -token $e -HashTable $TestExes
} else {
$ListOfSuites += $bare_name
}
}
ProcessSuites -ListOfSuites $ListOfSuites -HashOfHashes $CasesToRun
}
# Invoke a test with a filter and redirect all output
$InvokeTestCase = {
param($exe, $test, $log);
&$exe --gtest_filter=$test > $log 2>&1
}
# Invoke all tests and redirect output
$InvokeTestAsync = {
param($exe, $log)
&$exe > $log 2>&1
}
# Hash that contains tests to rerun if any failed
# Those tests will be rerun sequentially
# $Rerun = [ordered]@{}
# Test limiting factor here
[int]$count = 0
# Overall status
[bool]$script:success = $true;
function RunJobs($Suites, $TestCmds, [int]$ConcurrencyVal)
{
# Array to wait for any of the running jobs
$jobs = @()
# Hash JobToLog
$JobToLog = @{}
# Wait for all to finish and get the results
while(($JobToLog.Count -gt 0) -or
($TestCmds.Count -gt 0) -or
($Suites.Count -gt 0)) {
# Make sure we have maximum concurrent jobs running if anything
# and the $Limit either not set or allows to proceed
while(($JobToLog.Count -lt $ConcurrencyVal) -and
((($TestCmds.Count -gt 0) -or ($Suites.Count -gt 0)) -and
(($Limit -lt 0) -or ($count -lt $Limit)))) {
# We always favore suites to run if available
[string]$exe_name = ""
[string]$log_path = ""
$Cases = @{}
if($Suites.Count -gt 0) {
# Will the first one
ForEach($e in $Suites.Keys) {
$exe_name = $e
$Cases = $Suites[$e]
break
}
[string]$test_case = ""
[string]$log_path = ""
ForEach($c in $Cases.Keys) {
$test_case = $c
$log_path = $Cases[$c]
break
}
Write-Host "Starting $exe_name::$test_case"
[string]$Exe = -Join ($BinariesFolder, $exe_name)
$job = Start-Job -Name "$exe_name::$test_case" -ArgumentList @($Exe,$test_case,$log_path) -ScriptBlock $InvokeTestCase
$JobToLog.Add($job, $log_path)
$Cases.Remove($test_case)
if($Cases.Count -lt 1) {
$Suites.Remove($exe_name)
}
} elseif ($TestCmds.Count -gt 0) {
ForEach($e in $TestCmds.Keys) {
$exe_name = $e
$log_path = $TestCmds[$e]
break
}
Write-Host "Starting $exe_name"
[string]$Exe = -Join ($BinariesFolder, $exe_name)
$job = Start-Job -Name $exe_name -ScriptBlock $InvokeTestAsync -ArgumentList @($Exe,$log_path)
$JobToLog.Add($job, $log_path)
$TestCmds.Remove($exe_name)
} else {
Write-Error "In the job loop but nothing to run"
exit 1
}
++$count
} # End of Job starting loop
if($JobToLog.Count -lt 1) {
break
}
$jobs = @()
foreach($k in $JobToLog.Keys) { $jobs += $k }
$completed = Wait-Job -Job $jobs -Any
$log = $JobToLog[$completed]
$JobToLog.Remove($completed)
$message = -join @($completed.Name, " State: ", ($completed.State))
$log_content = @(Get-Content $log)
if($completed.State -ne "Completed") {
$script:success = $false
Write-Warning $message
$log_content | Write-Warning
} else {
# Scan the log. If we find PASSED and no occurrence of FAILED
# then it is a success
[bool]$pass_found = $false
ForEach($l in $log_content) {
if(($l -match "^\[\s+FAILED") -or
($l -match "Assertion failed:")) {
$pass_found = $false
break
}
if(($l -match "^\[\s+PASSED") -or
($l -match " : PASSED$") -or
($l -match "^PASS$") -or # Special c_test case
($l -match "Passed all tests!") ) {
$pass_found = $true
}
}
if(!$pass_found) {
$script:success = $false;
Write-Warning $message
$log_content | Write-Warning
} else {
Write-Host $message
}
}
# Remove cached job info from the system
# Should be no output
Receive-Job -Job $completed | Out-Null
}
}
RunJobs -Suites $CasesToRun -TestCmds $TestExes -ConcurrencyVal $Concurrency
$EndDate = (Get-Date)
New-TimeSpan -Start $StartDate -End $EndDate |
ForEach-Object {
"Elapsed time: {0:g}" -f $_
}
if(!$script:success) {
# This does not succeed killing off jobs quick
# So we simply exit
# Remove-Job -Job $jobs -Force
# indicate failure using this exit code
exit 1
}
exit 0

View File

@ -1,45 +0,0 @@
#!/bin/bash
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
set -ex
ROCKSDB_VERSION="6.7.3"
ZSTD_VERSION="1.4.4"
echo "This script configures CentOS with everything needed to build and run RocksDB"
yum update -y && yum install epel-release -y
yum install -y \
wget \
gcc-c++ \
snappy snappy-devel \
zlib zlib-devel \
bzip2 bzip2-devel \
lz4-devel \
libasan \
gflags
mkdir -pv /usr/local/rocksdb-${ROCKSDB_VERSION}
ln -sfT /usr/local/rocksdb-${ROCKSDB_VERSION} /usr/local/rocksdb
wget -qO /tmp/zstd-${ZSTD_VERSION}.tar.gz https://github.com/facebook/zstd/archive/v${ZSTD_VERSION}.tar.gz
wget -qO /tmp/rocksdb-${ROCKSDB_VERSION}.tar.gz https://github.com/facebook/rocksdb/archive/v${ROCKSDB_VERSION}.tar.gz
cd /tmp
tar xzvf zstd-${ZSTD_VERSION}.tar.gz
tar xzvf rocksdb-${ROCKSDB_VERSION}.tar.gz -C /usr/local/
echo "Installing ZSTD..."
pushd zstd-${ZSTD_VERSION}
make && make install
popd
echo "Compiling RocksDB..."
cd /usr/local/rocksdb
chown -R vagrant:vagrant /usr/local/rocksdb/
sudo -u vagrant make static_lib
cd examples/
sudo -u vagrant LD_LIBRARY_PATH=/usr/local/lib/ make all
sudo -u vagrant LD_LIBRARY_PATH=/usr/local/lib/ ./c_simple_example

78
build_tools/unity Executable file
View File

@ -0,0 +1,78 @@
#!/bin/sh
#
# Create the unity file
#
OUTPUT=$1
if test -z "$OUTPUT"; then
echo "usage: $0 <output-filename>" >&2
exit 1
fi
# Delete existing file, if it exists
rm -f "$OUTPUT"
touch "$OUTPUT"
# Detect OS
if test -z "$TARGET_OS"; then
TARGET_OS=`uname -s`
fi
# generic port files (working on all platform by #ifdef) go directly in /port
GENERIC_PORT_FILES=`cd "$ROCKSDB_ROOT"; find port -name '*.cc' | tr "\n" " "`
# On GCC, we pick libc's memcmp over GCC's memcmp via -fno-builtin-memcmp
case "$TARGET_OS" in
Darwin)
# PORT_FILES=port/darwin/darwin_specific.cc
;;
IOS)
;;
Linux)
# PORT_FILES=port/linux/linux_specific.cc
;;
SunOS)
# PORT_FILES=port/sunos/sunos_specific.cc
;;
FreeBSD)
# PORT_FILES=port/freebsd/freebsd_specific.cc
;;
NetBSD)
# PORT_FILES=port/netbsd/netbsd_specific.cc
;;
OpenBSD)
# PORT_FILES=port/openbsd/openbsd_specific.cc
;;
DragonFly)
# PORT_FILES=port/dragonfly/dragonfly_specific.cc
;;
OS_ANDROID_CROSSCOMPILE)
# PORT_FILES=port/android/android.cc
;;
*)
echo "Unknown platform!" >&2
exit 1
esac
# We want to make a list of all cc files within util, db, table, and helpers
# except for the test and benchmark files. By default, find will output a list
# of all files matching either rule, so we need to append -print to make the
# prune take effect.
DIRS="util db table utilities"
set -f # temporarily disable globbing so that our patterns arent expanded
PRUNE_TEST="-name *test*.cc -prune"
PRUNE_BENCH="-name *bench*.cc -prune"
PORTABLE_FILES=`cd "$ROCKSDB_ROOT"; find $DIRS $PRUNE_TEST -o $PRUNE_BENCH -o -name '*.cc' -print | sort`
PORTABLE_CPP=`cd "$ROCKSDB_ROOT"; find $DIRS $PRUNE_TEST -o $PRUNE_BENCH -o -name '*.cpp' -print | sort`
set +f # re-enable globbing
# The sources consist of the portable files, plus the platform-specific port
# file.
for SOURCE_FILE in $PORTABLE_FILES $GENERIC_PORT_FILES $PORT_FILES $PORTABLE_CPP
do
echo "#include <$SOURCE_FILE>" >> "$OUTPUT"
done
echo "int main(int argc, char** argv){ return 0; }" >> "$OUTPUT"

View File

@ -1,149 +0,0 @@
#!/bin/sh
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
#
# Update dependencies.sh file with the latest avaliable versions
BASEDIR=$(dirname $0)
OUTPUT=""
function log_header()
{
echo "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved." >> "$OUTPUT"
echo "# The file is generated using update_dependencies.sh." >> "$OUTPUT"
}
function log_variable()
{
echo "$1=${!1}" >> "$OUTPUT"
}
TP2_LATEST="/data/users/$USER/fbsource/fbcode/third-party2/"
## $1 => lib name
## $2 => lib version (if not provided, will try to pick latest)
## $3 => platform (if not provided, will try to pick latest gcc)
##
## get_lib_base will set a variable named ${LIB_NAME}_BASE to the lib location
function get_lib_base()
{
local lib_name=$1
local lib_version=$2
local lib_platform=$3
local result="$TP2_LATEST/$lib_name/"
# Lib Version
if [ -z "$lib_version" ] || [ "$lib_version" = "LATEST" ]; then
# version is not provided, use latest
result=`ls -dr1v $result/*/ | head -n1`
else
result="$result/$lib_version/"
fi
# Lib Platform
if [ -z "$lib_platform" ]; then
# platform is not provided, use latest gcc
result=`ls -dr1v $result/gcc-*[^fb]/ | head -n1`
else
echo $lib_platform
result="$result/$lib_platform/"
fi
result=`ls -1d $result/*/ | head -n1`
echo Finding link $result
# lib_name => LIB_NAME_BASE
local __res_var=${lib_name^^}"_BASE"
__res_var=`echo $__res_var | tr - _`
# LIB_NAME_BASE=$result
eval $__res_var=`readlink -f $result`
log_variable $__res_var
}
###########################################################
# platform010 dependencies #
###########################################################
OUTPUT="$BASEDIR/dependencies_platform010.sh"
rm -f "$OUTPUT"
touch "$OUTPUT"
echo "Writing dependencies to $OUTPUT"
# Compilers locations
GCC_BASE=`readlink -f $TP2_LATEST/gcc/11.x/centos7-native/*/`
CLANG_BASE=`readlink -f $TP2_LATEST/llvm-fb/12/platform010/*/`
log_header
log_variable GCC_BASE
log_variable CLANG_BASE
# Libraries locations
get_lib_base libgcc 11.x platform010
get_lib_base glibc 2.34 platform010
get_lib_base snappy LATEST platform010
get_lib_base zlib LATEST platform010
get_lib_base bzip2 LATEST platform010
get_lib_base lz4 LATEST platform010
get_lib_base zstd LATEST platform010
get_lib_base gflags LATEST platform010
get_lib_base jemalloc LATEST platform010
get_lib_base numa LATEST platform010
get_lib_base libunwind LATEST platform010
get_lib_base tbb 2018_U5 platform010
get_lib_base liburing LATEST platform010
get_lib_base benchmark LATEST platform010
get_lib_base kernel-headers fb platform010
get_lib_base binutils LATEST centos7-native
get_lib_base valgrind LATEST platform010
get_lib_base lua 5.3.4 platform010
git diff $OUTPUT
###########################################################
# platform009 dependencies #
###########################################################
OUTPUT="$BASEDIR/dependencies_platform009.sh"
rm -f "$OUTPUT"
touch "$OUTPUT"
echo "Writing dependencies to $OUTPUT"
# Compilers locations
GCC_BASE=`readlink -f $TP2_LATEST/gcc/9.x/centos7-native/*/`
CLANG_BASE=`readlink -f $TP2_LATEST/llvm-fb/9.0.0/platform009/*/`
log_header
log_variable GCC_BASE
log_variable CLANG_BASE
# Libraries locations
get_lib_base libgcc 9.x platform009
get_lib_base glibc 2.30 platform009
get_lib_base snappy LATEST platform009
get_lib_base zlib LATEST platform009
get_lib_base bzip2 LATEST platform009
get_lib_base lz4 LATEST platform009
get_lib_base zstd LATEST platform009
get_lib_base gflags LATEST platform009
get_lib_base jemalloc LATEST platform009
get_lib_base numa LATEST platform009
get_lib_base libunwind LATEST platform009
get_lib_base tbb 2018_U5 platform009
get_lib_base liburing LATEST platform009
get_lib_base benchmark LATEST platform009
get_lib_base kernel-headers fb platform009
get_lib_base binutils LATEST centos7-native
get_lib_base valgrind LATEST platform009
get_lib_base lua 5.3.4 platform009
git diff $OUTPUT

15
build_tools/valgrind_test.sh Executable file
View File

@ -0,0 +1,15 @@
#!/bin/bash
#A shell script for Jenknis to run valgrind on rocksdb tests
#Returns 0 on success when there are no failed tests
VALGRIND_DIR=build_tools/VALGRIND_LOGS
make clean
make -j$(nproc) valgrind_check
NUM_FAILED_TESTS=$((`wc -l $VALGRIND_DIR/valgrind_failed_tests | awk '{print $1}'` - 1))
if [ $NUM_FAILED_TESTS -lt 1 ]; then
echo No tests have valgrind errors
exit 0
else
cat $VALGRIND_DIR/valgrind_failed_tests
exit 1
fi

View File

@ -1,10 +1,8 @@
#!/usr/bin/env bash
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
#!/bin/sh
if [ "$#" = "0" ]; then
echo "Usage: $0 major|minor|patch|full"
echo "Usage: $0 major|minor|patch"
exit 1
fi
if [ "$1" = "major" ]; then
cat include/rocksdb/version.h | grep MAJOR | head -n1 | awk '{print $3}'
fi
@ -14,10 +12,3 @@ fi
if [ "$1" = "patch" ]; then
cat include/rocksdb/version.h | grep PATCH | head -n1 | awk '{print $3}'
fi
if [ "$1" = "full" ]; then
awk '/#define ROCKSDB/ { env[$2] = $3 }
END { printf "%s.%s.%s\n", env["ROCKSDB_MAJOR"],
env["ROCKSDB_MINOR"],
env["ROCKSDB_PATCH"] }' \
include/rocksdb/version.h
fi

72
cache/cache.cc vendored
View File

@ -1,72 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "rocksdb/cache.h"
#include "cache/lru_cache.h"
#include "rocksdb/secondary_cache.h"
#include "rocksdb/utilities/customizable_util.h"
#include "rocksdb/utilities/options_type.h"
#include "util/string_util.h"
namespace ROCKSDB_NAMESPACE {
#ifndef ROCKSDB_LITE
static std::unordered_map<std::string, OptionTypeInfo>
lru_cache_options_type_info = {
{"capacity",
{offsetof(struct LRUCacheOptions, capacity), OptionType::kSizeT,
OptionVerificationType::kNormal, OptionTypeFlags::kMutable}},
{"num_shard_bits",
{offsetof(struct LRUCacheOptions, num_shard_bits), OptionType::kInt,
OptionVerificationType::kNormal, OptionTypeFlags::kMutable}},
{"strict_capacity_limit",
{offsetof(struct LRUCacheOptions, strict_capacity_limit),
OptionType::kBoolean, OptionVerificationType::kNormal,
OptionTypeFlags::kMutable}},
{"high_pri_pool_ratio",
{offsetof(struct LRUCacheOptions, high_pri_pool_ratio),
OptionType::kDouble, OptionVerificationType::kNormal,
OptionTypeFlags::kMutable}},
};
#endif // ROCKSDB_LITE
Status SecondaryCache::CreateFromString(
const ConfigOptions& config_options, const std::string& value,
std::shared_ptr<SecondaryCache>* result) {
return LoadSharedObject<SecondaryCache>(config_options, value, nullptr,
result);
}
Status Cache::CreateFromString(const ConfigOptions& config_options,
const std::string& value,
std::shared_ptr<Cache>* result) {
Status status;
std::shared_ptr<Cache> cache;
if (value.find('=') == std::string::npos) {
cache = NewLRUCache(ParseSizeT(value));
} else {
#ifndef ROCKSDB_LITE
LRUCacheOptions cache_opts;
status = OptionTypeInfo::ParseStruct(config_options, "",
&lru_cache_options_type_info, "",
value, &cache_opts);
if (status.ok()) {
cache = NewLRUCache(cache_opts);
}
#else
(void)config_options;
status = Status::NotSupported("Cannot load cache in LITE mode ", value);
#endif //! ROCKSDB_LITE
}
if (status.ok()) {
result->swap(cache);
}
return status;
}
} // namespace ROCKSDB_NAMESPACE

20
cache/cache_bench.cc vendored
View File

@ -1,20 +0,0 @@
// Copyright (c) 2013-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#ifndef GFLAGS
#include <cstdio>
int main() {
fprintf(stderr, "Please install gflags to run rocksdb tools\n");
return 1;
}
#else
#include "rocksdb/cache_bench_tool.h"
int main(int argc, char** argv) {
return ROCKSDB_NAMESPACE::cache_bench_tool(argc, argv);
}
#endif // GFLAGS

View File

@ -1,924 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#ifdef GFLAGS
#include <cinttypes>
#include <cstddef>
#include <cstdio>
#include <limits>
#include <memory>
#include <set>
#include <sstream>
#include "db/db_impl/db_impl.h"
#include "monitoring/histogram.h"
#include "port/port.h"
#include "rocksdb/cache.h"
#include "rocksdb/convenience.h"
#include "rocksdb/db.h"
#include "rocksdb/env.h"
#include "rocksdb/secondary_cache.h"
#include "rocksdb/system_clock.h"
#include "rocksdb/table_properties.h"
#include "table/block_based/block_based_table_reader.h"
#include "table/block_based/cachable_entry.h"
#include "util/coding.h"
#include "util/gflags_compat.h"
#include "util/hash.h"
#include "util/mutexlock.h"
#include "util/random.h"
#include "util/stop_watch.h"
#include "util/string_util.h"
using GFLAGS_NAMESPACE::ParseCommandLineFlags;
static constexpr uint32_t KiB = uint32_t{1} << 10;
static constexpr uint32_t MiB = KiB << 10;
static constexpr uint64_t GiB = MiB << 10;
DEFINE_uint32(threads, 16, "Number of concurrent threads to run.");
DEFINE_uint64(cache_size, 1 * GiB,
"Number of bytes to use as a cache of uncompressed data.");
DEFINE_uint32(num_shard_bits, 6, "shard_bits.");
DEFINE_double(resident_ratio, 0.25,
"Ratio of keys fitting in cache to keyspace.");
DEFINE_uint64(ops_per_thread, 2000000U, "Number of operations per thread.");
DEFINE_uint32(value_bytes, 8 * KiB, "Size of each value added.");
DEFINE_uint32(skew, 5, "Degree of skew in key selection");
DEFINE_bool(populate_cache, true, "Populate cache before operations");
DEFINE_uint32(lookup_insert_percent, 87,
"Ratio of lookup (+ insert on not found) to total workload "
"(expressed as a percentage)");
DEFINE_uint32(insert_percent, 2,
"Ratio of insert to total workload (expressed as a percentage)");
DEFINE_uint32(lookup_percent, 10,
"Ratio of lookup to total workload (expressed as a percentage)");
DEFINE_uint32(erase_percent, 1,
"Ratio of erase to total workload (expressed as a percentage)");
DEFINE_bool(gather_stats, false,
"Whether to periodically simulate gathering block cache stats, "
"using one more thread.");
DEFINE_uint32(
gather_stats_sleep_ms, 1000,
"How many milliseconds to sleep between each gathering of stats.");
DEFINE_uint32(gather_stats_entries_per_lock, 256,
"For Cache::ApplyToAllEntries");
DEFINE_bool(skewed, false, "If true, skew the key access distribution");
#ifndef ROCKSDB_LITE
DEFINE_string(secondary_cache_uri, "",
"Full URI for creating a custom secondary cache object");
static class std::shared_ptr<ROCKSDB_NAMESPACE::SecondaryCache> secondary_cache;
#endif // ROCKSDB_LITE
DEFINE_bool(use_clock_cache, false, "");
// ## BEGIN stress_cache_key sub-tool options ##
// See class StressCacheKey below.
DEFINE_bool(stress_cache_key, false,
"If true, run cache key stress test instead");
DEFINE_uint32(
sck_files_per_day, 2500000,
"(-stress_cache_key) Simulated files generated per simulated day");
// NOTE: Giving each run a specified lifetime, rather than e.g. "until
// first collision" ensures equal skew from start-up, when collisions are
// less likely.
DEFINE_uint32(sck_days_per_run, 90,
"(-stress_cache_key) Number of days to simulate in each run");
// NOTE: The number of observed collisions directly affects the relative
// accuracy of the predicted probabilities. 15 observations should be well
// within factor-of-2 accuracy.
DEFINE_uint32(
sck_min_collision, 15,
"(-stress_cache_key) Keep running until this many collisions seen");
// sck_file_size_mb can be thought of as average file size. The simulation is
// not precise enough to care about the distribution of file sizes; other
// simulations (https://github.com/pdillinger/unique_id/tree/main/monte_carlo)
// indicate the distribution only makes a small difference (e.g. < 2x factor)
DEFINE_uint32(
sck_file_size_mb, 32,
"(-stress_cache_key) Simulated file size in MiB, for accounting purposes");
DEFINE_uint32(sck_reopen_nfiles, 100,
"(-stress_cache_key) Simulate DB re-open average every n files");
DEFINE_uint32(sck_restarts_per_day, 24,
"(-stress_cache_key) Average simulated process restarts per day "
"(across DBs)");
DEFINE_uint32(
sck_db_count, 100,
"(-stress_cache_key) Parallel DBs in simulation sharing a block cache");
DEFINE_uint32(
sck_table_bits, 20,
"(-stress_cache_key) Log2 number of tracked (live) files (across DBs)");
// sck_keep_bits being well below full 128 bits amplifies the collision
// probability so that the true probability can be estimated through observed
// collisions. (More explanation below.)
DEFINE_uint32(
sck_keep_bits, 50,
"(-stress_cache_key) Number of bits to keep from each cache key (<= 64)");
// sck_randomize is used to validate whether cache key is performing "better
// than random." Even with this setting, file offsets are not randomized.
DEFINE_bool(sck_randomize, false,
"(-stress_cache_key) Randomize (hash) cache key");
// See https://github.com/facebook/rocksdb/pull/9058
DEFINE_bool(sck_footer_unique_id, false,
"(-stress_cache_key) Simulate using proposed footer unique id");
// ## END stress_cache_key sub-tool options ##
namespace ROCKSDB_NAMESPACE {
class CacheBench;
namespace {
// State shared by all concurrent executions of the same benchmark.
class SharedState {
public:
explicit SharedState(CacheBench* cache_bench)
: cv_(&mu_),
num_initialized_(0),
start_(false),
num_done_(0),
cache_bench_(cache_bench) {}
~SharedState() {}
port::Mutex* GetMutex() { return &mu_; }
port::CondVar* GetCondVar() { return &cv_; }
CacheBench* GetCacheBench() const { return cache_bench_; }
void IncInitialized() { num_initialized_++; }
void IncDone() { num_done_++; }
bool AllInitialized() const { return num_initialized_ >= FLAGS_threads; }
bool AllDone() const { return num_done_ >= FLAGS_threads; }
void SetStart() { start_ = true; }
bool Started() const { return start_; }
private:
port::Mutex mu_;
port::CondVar cv_;
uint64_t num_initialized_;
bool start_;
uint64_t num_done_;
CacheBench* cache_bench_;
};
// Per-thread state for concurrent executions of the same benchmark.
struct ThreadState {
uint32_t tid;
Random64 rnd;
SharedState* shared;
HistogramImpl latency_ns_hist;
uint64_t duration_us = 0;
ThreadState(uint32_t index, SharedState* _shared)
: tid(index), rnd(1000 + index), shared(_shared) {}
};
struct KeyGen {
char key_data[27];
Slice GetRand(Random64& rnd, uint64_t max_key, int max_log) {
uint64_t key = 0;
if (!FLAGS_skewed) {
uint64_t raw = rnd.Next();
// Skew according to setting
for (uint32_t i = 0; i < FLAGS_skew; ++i) {
raw = std::min(raw, rnd.Next());
}
key = FastRange64(raw, max_key);
} else {
key = rnd.Skewed(max_log);
if (key > max_key) {
key -= max_key;
}
}
// Variable size and alignment
size_t off = key % 8;
key_data[0] = char{42};
EncodeFixed64(key_data + 1, key);
key_data[9] = char{11};
EncodeFixed64(key_data + 10, key);
key_data[18] = char{4};
EncodeFixed64(key_data + 19, key);
return Slice(&key_data[off], sizeof(key_data) - off);
}
};
char* createValue(Random64& rnd) {
char* rv = new char[FLAGS_value_bytes];
// Fill with some filler data, and take some CPU time
for (uint32_t i = 0; i < FLAGS_value_bytes; i += 8) {
EncodeFixed64(rv + i, rnd.Next());
}
return rv;
}
// Callbacks for secondary cache
size_t SizeFn(void* /*obj*/) { return FLAGS_value_bytes; }
Status SaveToFn(void* obj, size_t /*offset*/, size_t size, void* out) {
memcpy(out, obj, size);
return Status::OK();
}
// Different deleters to simulate using deleter to gather
// stats on the code origin and kind of cache entries.
void deleter1(const Slice& /*key*/, void* value) {
delete[] static_cast<char*>(value);
}
void deleter2(const Slice& /*key*/, void* value) {
delete[] static_cast<char*>(value);
}
void deleter3(const Slice& /*key*/, void* value) {
delete[] static_cast<char*>(value);
}
Cache::CacheItemHelper helper1(SizeFn, SaveToFn, deleter1);
Cache::CacheItemHelper helper2(SizeFn, SaveToFn, deleter2);
Cache::CacheItemHelper helper3(SizeFn, SaveToFn, deleter3);
} // namespace
class CacheBench {
static constexpr uint64_t kHundredthUint64 =
std::numeric_limits<uint64_t>::max() / 100U;
public:
CacheBench()
: max_key_(static_cast<uint64_t>(FLAGS_cache_size / FLAGS_resident_ratio /
FLAGS_value_bytes)),
lookup_insert_threshold_(kHundredthUint64 *
FLAGS_lookup_insert_percent),
insert_threshold_(lookup_insert_threshold_ +
kHundredthUint64 * FLAGS_insert_percent),
lookup_threshold_(insert_threshold_ +
kHundredthUint64 * FLAGS_lookup_percent),
erase_threshold_(lookup_threshold_ +
kHundredthUint64 * FLAGS_erase_percent),
skewed_(FLAGS_skewed) {
if (erase_threshold_ != 100U * kHundredthUint64) {
fprintf(stderr, "Percentages must add to 100.\n");
exit(1);
}
max_log_ = 0;
if (skewed_) {
uint64_t max_key = max_key_;
while (max_key >>= 1) max_log_++;
if (max_key > (static_cast<uint64_t>(1) << max_log_)) max_log_++;
}
if (FLAGS_use_clock_cache) {
cache_ = NewClockCache(FLAGS_cache_size, FLAGS_num_shard_bits);
if (!cache_) {
fprintf(stderr, "Clock cache not supported.\n");
exit(1);
}
} else {
LRUCacheOptions opts(FLAGS_cache_size, FLAGS_num_shard_bits, false, 0.5);
#ifndef ROCKSDB_LITE
if (!FLAGS_secondary_cache_uri.empty()) {
Status s = SecondaryCache::CreateFromString(
ConfigOptions(), FLAGS_secondary_cache_uri, &secondary_cache);
if (secondary_cache == nullptr) {
fprintf(
stderr,
"No secondary cache registered matching string: %s status=%s\n",
FLAGS_secondary_cache_uri.c_str(), s.ToString().c_str());
exit(1);
}
opts.secondary_cache = secondary_cache;
}
#endif // ROCKSDB_LITE
cache_ = NewLRUCache(opts);
}
}
~CacheBench() {}
void PopulateCache() {
Random64 rnd(1);
KeyGen keygen;
for (uint64_t i = 0; i < 2 * FLAGS_cache_size; i += FLAGS_value_bytes) {
cache_->Insert(keygen.GetRand(rnd, max_key_, max_log_), createValue(rnd),
&helper1, FLAGS_value_bytes);
}
}
bool Run() {
const auto clock = SystemClock::Default().get();
PrintEnv();
SharedState shared(this);
std::vector<std::unique_ptr<ThreadState> > threads(FLAGS_threads);
for (uint32_t i = 0; i < FLAGS_threads; i++) {
threads[i].reset(new ThreadState(i, &shared));
std::thread(ThreadBody, threads[i].get()).detach();
}
HistogramImpl stats_hist;
std::string stats_report;
std::thread stats_thread(StatsBody, &shared, &stats_hist, &stats_report);
uint64_t start_time;
{
MutexLock l(shared.GetMutex());
while (!shared.AllInitialized()) {
shared.GetCondVar()->Wait();
}
// Record start time
start_time = clock->NowMicros();
// Start all threads
shared.SetStart();
shared.GetCondVar()->SignalAll();
// Wait threads to complete
while (!shared.AllDone()) {
shared.GetCondVar()->Wait();
}
}
// Stats gathering is considered background work. This time measurement
// is for foreground work, and not really ideal for that. See below.
uint64_t end_time = clock->NowMicros();
stats_thread.join();
// Wall clock time - includes idle time if threads
// finish at different times (not ideal).
double elapsed_secs = static_cast<double>(end_time - start_time) * 1e-6;
uint32_t ops_per_sec = static_cast<uint32_t>(
1.0 * FLAGS_threads * FLAGS_ops_per_thread / elapsed_secs);
printf("Complete in %.3f s; Rough parallel ops/sec = %u\n", elapsed_secs,
ops_per_sec);
// Total time in each thread (more accurate throughput measure)
elapsed_secs = 0;
for (uint32_t i = 0; i < FLAGS_threads; i++) {
elapsed_secs += threads[i]->duration_us * 1e-6;
}
ops_per_sec = static_cast<uint32_t>(1.0 * FLAGS_threads *
FLAGS_ops_per_thread / elapsed_secs);
printf("Thread ops/sec = %u\n", ops_per_sec);
printf("\nOperation latency (ns):\n");
HistogramImpl combined;
for (uint32_t i = 0; i < FLAGS_threads; i++) {
combined.Merge(threads[i]->latency_ns_hist);
}
printf("%s", combined.ToString().c_str());
if (FLAGS_gather_stats) {
printf("\nGather stats latency (us):\n");
printf("%s", stats_hist.ToString().c_str());
}
printf("\n%s", stats_report.c_str());
return true;
}
private:
std::shared_ptr<Cache> cache_;
const uint64_t max_key_;
// Cumulative thresholds in the space of a random uint64_t
const uint64_t lookup_insert_threshold_;
const uint64_t insert_threshold_;
const uint64_t lookup_threshold_;
const uint64_t erase_threshold_;
const bool skewed_;
int max_log_;
// A benchmark version of gathering stats on an active block cache by
// iterating over it. The primary purpose is to measure the impact of
// gathering stats with ApplyToAllEntries on throughput- and
// latency-sensitive Cache users. Performance of stats gathering is
// also reported. The last set of gathered stats is also reported, for
// manual sanity checking for logical errors or other unexpected
// behavior of cache_bench or the underlying Cache.
static void StatsBody(SharedState* shared, HistogramImpl* stats_hist,
std::string* stats_report) {
if (!FLAGS_gather_stats) {
return;
}
const auto clock = SystemClock::Default().get();
uint64_t total_key_size = 0;
uint64_t total_charge = 0;
uint64_t total_entry_count = 0;
std::set<Cache::DeleterFn> deleters;
StopWatchNano timer(clock);
for (;;) {
uint64_t time;
time = clock->NowMicros();
uint64_t deadline = time + uint64_t{FLAGS_gather_stats_sleep_ms} * 1000;
{
MutexLock l(shared->GetMutex());
for (;;) {
if (shared->AllDone()) {
std::ostringstream ostr;
ostr << "Most recent cache entry stats:\n"
<< "Number of entries: " << total_entry_count << "\n"
<< "Total charge: " << BytesToHumanString(total_charge) << "\n"
<< "Average key size: "
<< (1.0 * total_key_size / total_entry_count) << "\n"
<< "Average charge: "
<< BytesToHumanString(static_cast<uint64_t>(
1.0 * total_charge / total_entry_count))
<< "\n"
<< "Unique deleters: " << deleters.size() << "\n";
*stats_report = ostr.str();
return;
}
if (clock->NowMicros() >= deadline) {
break;
}
uint64_t diff = deadline - std::min(clock->NowMicros(), deadline);
shared->GetCondVar()->TimedWait(diff + 1);
}
}
// Now gather stats, outside of mutex
total_key_size = 0;
total_charge = 0;
total_entry_count = 0;
deleters.clear();
auto fn = [&](const Slice& key, void* /*value*/, size_t charge,
Cache::DeleterFn deleter) {
total_key_size += key.size();
total_charge += charge;
++total_entry_count;
// Something slightly more expensive as in (future) stats by category
deleters.insert(deleter);
};
timer.Start();
Cache::ApplyToAllEntriesOptions opts;
opts.average_entries_per_lock = FLAGS_gather_stats_entries_per_lock;
shared->GetCacheBench()->cache_->ApplyToAllEntries(fn, opts);
stats_hist->Add(timer.ElapsedNanos() / 1000);
}
}
static void ThreadBody(ThreadState* thread) {
SharedState* shared = thread->shared;
{
MutexLock l(shared->GetMutex());
shared->IncInitialized();
if (shared->AllInitialized()) {
shared->GetCondVar()->SignalAll();
}
while (!shared->Started()) {
shared->GetCondVar()->Wait();
}
}
thread->shared->GetCacheBench()->OperateCache(thread);
{
MutexLock l(shared->GetMutex());
shared->IncDone();
if (shared->AllDone()) {
shared->GetCondVar()->SignalAll();
}
}
}
void OperateCache(ThreadState* thread) {
// To use looked-up values
uint64_t result = 0;
// To hold handles for a non-trivial amount of time
Cache::Handle* handle = nullptr;
KeyGen gen;
const auto clock = SystemClock::Default().get();
uint64_t start_time = clock->NowMicros();
StopWatchNano timer(clock);
for (uint64_t i = 0; i < FLAGS_ops_per_thread; i++) {
timer.Start();
Slice key = gen.GetRand(thread->rnd, max_key_, max_log_);
uint64_t random_op = thread->rnd.Next();
Cache::CreateCallback create_cb = [](const void* buf, size_t size,
void** out_obj,
size_t* charge) -> Status {
*out_obj = reinterpret_cast<void*>(new char[size]);
memcpy(*out_obj, buf, size);
*charge = size;
return Status::OK();
};
if (random_op < lookup_insert_threshold_) {
if (handle) {
cache_->Release(handle);
handle = nullptr;
}
// do lookup
handle = cache_->Lookup(key, &helper2, create_cb, Cache::Priority::LOW,
true);
if (handle) {
// do something with the data
result += NPHash64(static_cast<char*>(cache_->Value(handle)),
FLAGS_value_bytes);
} else {
// do insert
cache_->Insert(key, createValue(thread->rnd), &helper2,
FLAGS_value_bytes, &handle);
}
} else if (random_op < insert_threshold_) {
if (handle) {
cache_->Release(handle);
handle = nullptr;
}
// do insert
cache_->Insert(key, createValue(thread->rnd), &helper3,
FLAGS_value_bytes, &handle);
} else if (random_op < lookup_threshold_) {
if (handle) {
cache_->Release(handle);
handle = nullptr;
}
// do lookup
handle = cache_->Lookup(key, &helper2, create_cb, Cache::Priority::LOW,
true);
if (handle) {
// do something with the data
result += NPHash64(static_cast<char*>(cache_->Value(handle)),
FLAGS_value_bytes);
}
} else if (random_op < erase_threshold_) {
// do erase
cache_->Erase(key);
} else {
// Should be extremely unlikely (noop)
assert(random_op >= kHundredthUint64 * 100U);
}
thread->latency_ns_hist.Add(timer.ElapsedNanos());
}
if (handle) {
cache_->Release(handle);
handle = nullptr;
}
// Ensure computations on `result` are not optimized away.
if (result == 1) {
printf("You are extremely unlucky(2). Try again.\n");
exit(1);
}
thread->duration_us = clock->NowMicros() - start_time;
}
void PrintEnv() const {
printf("RocksDB version : %d.%d\n", kMajorVersion, kMinorVersion);
printf("Number of threads : %u\n", FLAGS_threads);
printf("Ops per thread : %" PRIu64 "\n", FLAGS_ops_per_thread);
printf("Cache size : %s\n",
BytesToHumanString(FLAGS_cache_size).c_str());
printf("Num shard bits : %u\n", FLAGS_num_shard_bits);
printf("Max key : %" PRIu64 "\n", max_key_);
printf("Resident ratio : %g\n", FLAGS_resident_ratio);
printf("Skew degree : %u\n", FLAGS_skew);
printf("Populate cache : %d\n", int{FLAGS_populate_cache});
printf("Lookup+Insert pct : %u%%\n", FLAGS_lookup_insert_percent);
printf("Insert percentage : %u%%\n", FLAGS_insert_percent);
printf("Lookup percentage : %u%%\n", FLAGS_lookup_percent);
printf("Erase percentage : %u%%\n", FLAGS_erase_percent);
std::ostringstream stats;
if (FLAGS_gather_stats) {
stats << "enabled (" << FLAGS_gather_stats_sleep_ms << "ms, "
<< FLAGS_gather_stats_entries_per_lock << "/lock)";
} else {
stats << "disabled";
}
printf("Gather stats : %s\n", stats.str().c_str());
printf("----------------------------\n");
}
};
// cache_bench -stress_cache_key is an independent embedded tool for
// estimating the probability of CacheKey collisions through simulation.
// At a high level, it simulates generating SST files over many months,
// keeping them in the DB and/or cache for some lifetime while staying
// under resource caps, and checking for any cache key collisions that
// arise among the set of live files. For efficient simulation, we make
// some simplifying "pessimistic" assumptions (that only increase the
// chance of the simulation reporting a collision relative to the chance
// of collision in practice):
// * Every generated file has a cache entry for every byte offset in the
// file (contiguous range of cache keys)
// * All of every file is cached for its entire lifetime. (Here "lifetime"
// is technically the union of DB and Cache lifetime, though we only
// model a generous DB lifetime, where space usage is always maximized.
// In a effective Cache, lifetime in cache can only substantially exceed
// lifetime in DB if there is little cache activity; cache activity is
// required to hit cache key collisions.)
//
// It would be possible to track an exact set of cache key ranges for the
// set of live files, but we would have no hope of observing collisions
// (overlap in live files) in our simulation. We need to employ some way
// of amplifying collision probability that allows us to predict the real
// collision probability by extrapolation from observed collisions. Our
// basic approach is to reduce each cache key range down to some smaller
// number of bits, and limiting to bits that are shared over the whole
// range. Now we can observe collisions using a set of smaller stripped-down
// (reduced) cache keys. Let's do some case analysis to understand why this
// works:
// * No collision in reduced key - because the reduction is a pure function
// this implies no collision in the full keys
// * Collision detected between two reduced keys - either
// * The reduction has dropped some structured uniqueness info (from one of
// session counter or file number; file offsets are never materialized here).
// This can only artificially inflate the observed and extrapolated collision
// probabilities. We only have to worry about this in designing the reduction.
// * The reduction has preserved all the structured uniqueness in the cache
// key, which means either
// * REJECTED: We have a uniqueness bug in generating cache keys, where
// structured uniqueness info should have been different but isn't. In such a
// case, increasing by 1 the number of bits kept after reduction would not
// reduce observed probabilities by half. (In our observations, the
// probabilities are reduced approximately by half.)
// * ACCEPTED: The lost unstructured uniqueness in the key determines the
// probability that an observed collision would imply an overlap in ranges.
// In short, dropping n bits from key would increase collision probability by
// 2**n, assuming those n bits have full entropy in unstructured uniqueness.
//
// But we also have to account for the key ranges based on file size. If file
// sizes are roughly 2**b offsets, using XOR in 128-bit cache keys for
// "ranges", we know from other simulations (see
// https://github.com/pdillinger/unique_id/) that that's roughly equivalent to
// (less than 2x higher collision probability) using a cache key of size
// 128 - b bits for the whole file. (This is the only place we make an
// "optimistic" assumption, which is more than offset by the real
// implementation stripping off 2 lower bits from block byte offsets for cache
// keys. The simulation assumes byte offsets, which is net pessimistic.)
//
// So to accept the extrapolation as valid, we need to be confident that all
// "lost" bits, excluding those covered by file offset, are full entropy.
// Recall that we have assumed (verifiably, safely) that other structured data
// (file number and session counter) are kept, not lost. Based on the
// implementation comments for OffsetableCacheKey, the only potential hole here
// is that we only have ~103 bits of entropy in "all new" session IDs, and in
// extreme cases, there might be only 1 DB ID. However, because the upper ~39
// bits of session ID are hashed, the combination of file number and file
// offset only has to add to 25 bits (or more) to ensure full entropy in
// unstructured uniqueness lost in the reduction. Typical file size of 32MB
// suffices (at least for simulation purposes where we assume each file offset
// occupies a cache key).
//
// Example results in comments on OffsetableCacheKey.
class StressCacheKey {
public:
void Run() {
if (FLAGS_sck_footer_unique_id) {
// Proposed footer unique IDs are DB-independent and session-independent
// (but process-dependent) which is most easily simulated here by
// assuming 1 DB and (later below) no session resets without process
// reset.
FLAGS_sck_db_count = 1;
}
// Describe the simulated workload
uint64_t mb_per_day =
uint64_t{FLAGS_sck_files_per_day} * FLAGS_sck_file_size_mb;
printf("Total cache or DBs size: %gTiB Writing %g MiB/s or %gTiB/day\n",
FLAGS_sck_file_size_mb / 1024.0 / 1024.0 *
std::pow(2.0, FLAGS_sck_table_bits),
mb_per_day / 86400.0, mb_per_day / 1024.0 / 1024.0);
// For extrapolating probability of any collisions from a number of
// observed collisions
multiplier_ = std::pow(2.0, 128 - FLAGS_sck_keep_bits) /
(FLAGS_sck_file_size_mb * 1024.0 * 1024.0);
printf(
"Multiply by %g to correct for simulation losses (but still assume "
"whole file cached)\n",
multiplier_);
restart_nfiles_ = FLAGS_sck_files_per_day / FLAGS_sck_restarts_per_day;
double without_ejection =
std::pow(1.414214, FLAGS_sck_keep_bits) / FLAGS_sck_files_per_day;
// This should be a lower bound for -sck_randomize, usually a terribly
// rough lower bound.
// If observation is worse than this, then something has gone wrong.
printf(
"Without ejection, expect random collision after %g days (%g "
"corrected)\n",
without_ejection, without_ejection * multiplier_);
double with_full_table =
std::pow(2.0, FLAGS_sck_keep_bits - FLAGS_sck_table_bits) /
FLAGS_sck_files_per_day;
// This is an alternate lower bound for -sck_randomize, usually pretty
// accurate. Our cache keys should usually perform "better than random"
// but always no worse. (If observation is substantially worse than this,
// then something has gone wrong.)
printf(
"With ejection and full table, expect random collision after %g "
"days (%g corrected)\n",
with_full_table, with_full_table * multiplier_);
collisions_ = 0;
// Run until sufficient number of observed collisions.
for (int i = 1; collisions_ < FLAGS_sck_min_collision; i++) {
RunOnce();
if (collisions_ == 0) {
printf(
"No collisions after %d x %u days "
" \n",
i, FLAGS_sck_days_per_run);
} else {
double est = 1.0 * i * FLAGS_sck_days_per_run / collisions_;
printf("%" PRIu64
" collisions after %d x %u days, est %g days between (%g "
"corrected) \n",
collisions_, i, FLAGS_sck_days_per_run, est, est * multiplier_);
}
}
}
void RunOnce() {
// Re-initialized simulated state
const size_t db_count = FLAGS_sck_db_count;
dbs_.reset(new TableProperties[db_count]{});
const size_t table_mask = (size_t{1} << FLAGS_sck_table_bits) - 1;
table_.reset(new uint64_t[table_mask + 1]{});
if (FLAGS_sck_keep_bits > 64) {
FLAGS_sck_keep_bits = 64;
}
// Details of which bits are dropped in reduction
uint32_t shift_away = 64 - FLAGS_sck_keep_bits;
// Shift away fewer potential file number bits (b) than potential
// session counter bits (a).
uint32_t shift_away_b = shift_away / 3;
uint32_t shift_away_a = shift_away - shift_away_b;
process_count_ = 0;
session_count_ = 0;
ResetProcess();
Random64 r{std::random_device{}()};
uint64_t max_file_count =
uint64_t{FLAGS_sck_files_per_day} * FLAGS_sck_days_per_run;
uint64_t file_size = FLAGS_sck_file_size_mb * uint64_t{1024} * 1024U;
uint32_t report_count = 0;
uint32_t collisions_this_run = 0;
size_t db_i = 0;
for (uint64_t file_count = 1; file_count <= max_file_count;
++file_count, ++db_i) {
// Round-robin through DBs (this faster than %)
if (db_i >= db_count) {
db_i = 0;
}
// Any other periodic actions before simulating next file
if (!FLAGS_sck_footer_unique_id && r.OneIn(FLAGS_sck_reopen_nfiles)) {
ResetSession(db_i);
} else if (r.OneIn(restart_nfiles_)) {
ResetProcess();
}
// Simulate next file
OffsetableCacheKey ock;
dbs_[db_i].orig_file_number += 1;
// skip some file numbers for other file kinds, except in footer unique
// ID, orig_file_number here tracks process-wide generated SST file
// count.
if (!FLAGS_sck_footer_unique_id) {
dbs_[db_i].orig_file_number += (r.Next() & 3);
}
bool is_stable;
BlockBasedTable::SetupBaseCacheKey(&dbs_[db_i], /* ignored */ "",
/* ignored */ 42, file_size, &ock,
&is_stable);
assert(is_stable);
// Get a representative cache key, which later we analytically generalize
// to a range.
CacheKey ck = ock.WithOffset(0);
uint64_t reduced_key;
if (FLAGS_sck_randomize) {
reduced_key = GetSliceHash64(ck.AsSlice()) >> shift_away;
} else if (FLAGS_sck_footer_unique_id) {
// Special case: keep only file number, not session counter
uint32_t a = DecodeFixed32(ck.AsSlice().data() + 4) >> shift_away_a;
uint32_t b = DecodeFixed32(ck.AsSlice().data() + 12) >> shift_away_b;
reduced_key = (uint64_t{a} << 32) + b;
} else {
// Try to keep file number and session counter (shift away other bits)
uint32_t a = DecodeFixed32(ck.AsSlice().data()) << shift_away_a;
uint32_t b = DecodeFixed32(ck.AsSlice().data() + 12) >> shift_away_b;
reduced_key = (uint64_t{a} << 32) + b;
}
if (reduced_key == 0) {
// Unlikely, but we need to exclude tracking this value because we
// use it to mean "empty" in table. This case is OK as long as we
// don't hit it often.
printf("Hit Zero! \n");
file_count--;
continue;
}
uint64_t h =
NPHash64(reinterpret_cast<char*>(&reduced_key), sizeof(reduced_key));
// Skew expected lifetimes, for high variance (super-Poisson) variance
// in actual lifetimes.
size_t pos =
std::min(Lower32of64(h) & table_mask, Upper32of64(h) & table_mask);
if (table_[pos] == reduced_key) {
collisions_this_run++;
// Our goal is to predict probability of no collisions, not expected
// number of collisions. To make the distinction, we have to get rid
// of observing correlated collisions, which this takes care of:
ResetProcess();
} else {
// Replace (end of lifetime for file that was in this slot)
table_[pos] = reduced_key;
}
if (++report_count == FLAGS_sck_files_per_day) {
report_count = 0;
// Estimate fill %
size_t incr = table_mask / 1000;
size_t sampled_count = 0;
for (size_t i = 0; i <= table_mask; i += incr) {
if (table_[i] != 0) {
sampled_count++;
}
}
// Report
printf(
"%" PRIu64 " days, %" PRIu64 " proc, %" PRIu64
" sess, %u coll, occ %g%%, ejected %g%% \r",
file_count / FLAGS_sck_files_per_day, process_count_,
session_count_, collisions_this_run, 100.0 * sampled_count / 1000.0,
100.0 * (1.0 - sampled_count / 1000.0 * table_mask / file_count));
fflush(stdout);
}
}
collisions_ += collisions_this_run;
}
void ResetSession(size_t i) {
dbs_[i].db_session_id = DBImpl::GenerateDbSessionId(nullptr);
session_count_++;
}
void ResetProcess() {
process_count_++;
DBImpl::TEST_ResetDbSessionIdGen();
for (size_t i = 0; i < FLAGS_sck_db_count; ++i) {
ResetSession(i);
}
if (FLAGS_sck_footer_unique_id) {
// For footer unique ID, this tracks process-wide generated SST file
// count.
dbs_[0].orig_file_number = 0;
}
}
private:
// Use db_session_id and orig_file_number from TableProperties
std::unique_ptr<TableProperties[]> dbs_;
std::unique_ptr<uint64_t[]> table_;
uint64_t process_count_ = 0;
uint64_t session_count_ = 0;
uint64_t collisions_ = 0;
uint32_t restart_nfiles_ = 0;
double multiplier_ = 0.0;
};
int cache_bench_tool(int argc, char** argv) {
ParseCommandLineFlags(&argc, &argv, true);
if (FLAGS_stress_cache_key) {
// Alternate tool
StressCacheKey().Run();
return 0;
}
if (FLAGS_threads <= 0) {
fprintf(stderr, "threads number <= 0\n");
exit(1);
}
ROCKSDB_NAMESPACE::CacheBench bench;
if (FLAGS_populate_cache) {
bench.PopulateCache();
printf("Population complete\n");
printf("----------------------------\n");
}
if (bench.Run()) {
return 0;
} else {
return 1;
}
} // namespace ROCKSDB_NAMESPACE
} // namespace ROCKSDB_NAMESPACE
#endif // GFLAGS

View File

@ -1,128 +0,0 @@
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "cache/cache_entry_roles.h"
#include <mutex>
#include "port/lang.h"
namespace ROCKSDB_NAMESPACE {
std::array<std::string, kNumCacheEntryRoles> kCacheEntryRoleToCamelString{{
"DataBlock",
"FilterBlock",
"FilterMetaBlock",
"DeprecatedFilterBlock",
"IndexBlock",
"OtherBlock",
"WriteBuffer",
"CompressionDictionaryBuildingBuffer",
"FilterConstruction",
"BlockBasedTableReader",
"Misc",
}};
std::array<std::string, kNumCacheEntryRoles> kCacheEntryRoleToHyphenString{{
"data-block",
"filter-block",
"filter-meta-block",
"deprecated-filter-block",
"index-block",
"other-block",
"write-buffer",
"compression-dictionary-building-buffer",
"filter-construction",
"block-based-table-reader",
"misc",
}};
const std::string& GetCacheEntryRoleName(CacheEntryRole role) {
return kCacheEntryRoleToHyphenString[static_cast<size_t>(role)];
}
const std::string& BlockCacheEntryStatsMapKeys::CacheId() {
static const std::string kCacheId = "id";
return kCacheId;
}
const std::string& BlockCacheEntryStatsMapKeys::CacheCapacityBytes() {
static const std::string kCacheCapacityBytes = "capacity";
return kCacheCapacityBytes;
}
const std::string&
BlockCacheEntryStatsMapKeys::LastCollectionDurationSeconds() {
static const std::string kLastCollectionDurationSeconds =
"secs_for_last_collection";
return kLastCollectionDurationSeconds;
}
const std::string& BlockCacheEntryStatsMapKeys::LastCollectionAgeSeconds() {
static const std::string kLastCollectionAgeSeconds =
"secs_since_last_collection";
return kLastCollectionAgeSeconds;
}
namespace {
std::string GetPrefixedCacheEntryRoleName(const std::string& prefix,
CacheEntryRole role) {
const std::string& role_name = GetCacheEntryRoleName(role);
std::string prefixed_role_name;
prefixed_role_name.reserve(prefix.size() + role_name.size());
prefixed_role_name.append(prefix);
prefixed_role_name.append(role_name);
return prefixed_role_name;
}
} // namespace
std::string BlockCacheEntryStatsMapKeys::EntryCount(CacheEntryRole role) {
const static std::string kPrefix = "count.";
return GetPrefixedCacheEntryRoleName(kPrefix, role);
}
std::string BlockCacheEntryStatsMapKeys::UsedBytes(CacheEntryRole role) {
const static std::string kPrefix = "bytes.";
return GetPrefixedCacheEntryRoleName(kPrefix, role);
}
std::string BlockCacheEntryStatsMapKeys::UsedPercent(CacheEntryRole role) {
const static std::string kPrefix = "percent.";
return GetPrefixedCacheEntryRoleName(kPrefix, role);
}
namespace {
struct Registry {
std::mutex mutex;
UnorderedMap<Cache::DeleterFn, CacheEntryRole> role_map;
void Register(Cache::DeleterFn fn, CacheEntryRole role) {
std::lock_guard<std::mutex> lock(mutex);
role_map[fn] = role;
}
UnorderedMap<Cache::DeleterFn, CacheEntryRole> Copy() {
std::lock_guard<std::mutex> lock(mutex);
return role_map;
}
};
Registry& GetRegistry() {
STATIC_AVOID_DESTRUCTION(Registry, registry);
return registry;
}
} // namespace
void RegisterCacheDeleterRole(Cache::DeleterFn fn, CacheEntryRole role) {
GetRegistry().Register(fn, role);
}
UnorderedMap<Cache::DeleterFn, CacheEntryRole> CopyCacheDeleterRoleMap() {
return GetRegistry().Copy();
}
} // namespace ROCKSDB_NAMESPACE

View File

@ -1,103 +0,0 @@
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#include <array>
#include <cstdint>
#include <memory>
#include <type_traits>
#include "rocksdb/cache.h"
#include "util/hash_containers.h"
namespace ROCKSDB_NAMESPACE {
extern std::array<std::string, kNumCacheEntryRoles>
kCacheEntryRoleToCamelString;
extern std::array<std::string, kNumCacheEntryRoles>
kCacheEntryRoleToHyphenString;
// To associate cache entries with their role, we use a hack on the
// existing Cache interface. Because the deleter of an entry can authenticate
// the code origin of an entry, we can elaborate the choice of deleter to
// also encode role information, without inferring false role information
// from entries not choosing to encode a role.
//
// The rest of this file is for handling mappings between deleters and
// roles.
// To infer a role from a deleter, the deleter must be registered. This
// can be done "manually" with this function. This function is thread-safe,
// and the registration mappings go into private but static storage. (Note
// that DeleterFn is a function pointer, not std::function. Registrations
// should not be too many.)
void RegisterCacheDeleterRole(Cache::DeleterFn fn, CacheEntryRole role);
// Gets a copy of the registered deleter -> role mappings. This is the only
// function for reading the mappings made with RegisterCacheDeleterRole.
// Why only this interface for reading?
// * This function has to be thread safe, which could incur substantial
// overhead. We should not pay this overhead for every deleter look-up.
// * This is suitable for preparing for batch operations, like with
// CacheEntryStatsCollector.
// * The number of mappings should be sufficiently small (dozens).
UnorderedMap<Cache::DeleterFn, CacheEntryRole> CopyCacheDeleterRoleMap();
// ************************************************************** //
// An automatic registration infrastructure. This enables code
// to simply ask for a deleter associated with a particular type
// and role, and registration is automatic. In a sense, this is
// a small dependency injection infrastructure, because linking
// in new deleter instantiations is essentially sufficient for
// making stats collection (using CopyCacheDeleterRoleMap) aware
// of them.
namespace cache_entry_roles_detail {
template <typename T, CacheEntryRole R>
struct RegisteredDeleter {
RegisteredDeleter() { RegisterCacheDeleterRole(Delete, R); }
// These have global linkage to help ensure compiler optimizations do not
// break uniqueness for each <T,R>
static void Delete(const Slice& /* key */, void* value) {
// Supports T == Something[], unlike delete operator
std::default_delete<T>()(
static_cast<typename std::remove_extent<T>::type*>(value));
}
};
template <CacheEntryRole R>
struct RegisteredNoopDeleter {
RegisteredNoopDeleter() { RegisterCacheDeleterRole(Delete, R); }
static void Delete(const Slice& /* key */, void* /* value */) {
// Here was `assert(value == nullptr);` but we can also put pointers
// to static data in Cache, for testing at least.
}
};
} // namespace cache_entry_roles_detail
// Get an automatically registered deleter for value type T and role R.
// Based on C++ semantics, registration is invoked exactly once in a
// thread-safe way on first call to this function, for each <T, R>.
template <typename T, CacheEntryRole R>
Cache::DeleterFn GetCacheEntryDeleterForRole() {
static cache_entry_roles_detail::RegisteredDeleter<T, R> reg;
return reg.Delete;
}
// Get an automatically registered no-op deleter (value should be nullptr)
// and associated with role R. This is used for Cache "reservation" entries
// such as for WriteBufferManager.
template <CacheEntryRole R>
Cache::DeleterFn GetNoopDeleterForRole() {
static cache_entry_roles_detail::RegisteredNoopDeleter<R> reg;
return reg.Delete;
}
} // namespace ROCKSDB_NAMESPACE

View File

@ -1,183 +0,0 @@
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#include <array>
#include <cstdint>
#include <memory>
#include <mutex>
#include "cache/cache_helpers.h"
#include "cache/cache_key.h"
#include "port/lang.h"
#include "rocksdb/cache.h"
#include "rocksdb/status.h"
#include "rocksdb/system_clock.h"
#include "test_util/sync_point.h"
#include "util/coding_lean.h"
namespace ROCKSDB_NAMESPACE {
// A generic helper object for gathering stats about cache entries by
// iterating over them with ApplyToAllEntries. This class essentially
// solves the problem of slowing down a Cache with too many stats
// collectors that could be sharing stat results, such as from multiple
// column families or multiple DBs sharing a Cache. We employ a few
// mitigations:
// * Only one collector for a particular kind of Stats is alive
// for each Cache. This is guaranteed using the Cache itself to hold
// the collector.
// * A mutex ensures only one thread is gathering stats for this
// collector.
// * The most recent gathered stats are saved and simply copied to
// satisfy requests within a time window (default: 3 minutes) of
// completion of the most recent stat gathering.
//
// Template parameter Stats must be copyable and trivially constructable,
// as well as...
// concept Stats {
// // Notification before applying callback to all entries
// void BeginCollection(Cache*, SystemClock*, uint64_t start_time_micros);
// // Get the callback to apply to all entries. `callback`
// // type must be compatible with Cache::ApplyToAllEntries
// callback GetEntryCallback();
// // Notification after applying callback to all entries
// void EndCollection(Cache*, SystemClock*, uint64_t end_time_micros);
// // Notification that a collection was skipped because of
// // sufficiently recent saved results.
// void SkippedCollection();
// }
template <class Stats>
class CacheEntryStatsCollector {
public:
// Gather and save stats if saved stats are too old. (Use GetStats() to
// read saved stats.)
//
// Maximum allowed age for a "hit" on saved results is determined by the
// two interval parameters. Both set to 0 forces a re-scan. For example
// with min_interval_seconds=300 and min_interval_factor=100, if the last
// scan took 10s, we would only rescan ("miss") if the age in seconds of
// the saved results is > max(300, 100*10).
// Justification: scans can vary wildly in duration, e.g. from 0.02 sec
// to as much as 20 seconds, so we want to be able to cap the absolute
// and relative frequency of scans.
void CollectStats(int min_interval_seconds, int min_interval_factor) {
// Waits for any pending reader or writer (collector)
std::lock_guard<std::mutex> lock(working_mutex_);
uint64_t max_age_micros =
static_cast<uint64_t>(std::max(min_interval_seconds, 0)) * 1000000U;
if (last_end_time_micros_ > last_start_time_micros_ &&
min_interval_factor > 0) {
max_age_micros = std::max(
max_age_micros, min_interval_factor * (last_end_time_micros_ -
last_start_time_micros_));
}
uint64_t start_time_micros = clock_->NowMicros();
if ((start_time_micros - last_end_time_micros_) > max_age_micros) {
last_start_time_micros_ = start_time_micros;
working_stats_.BeginCollection(cache_, clock_, start_time_micros);
cache_->ApplyToAllEntries(working_stats_.GetEntryCallback(), {});
TEST_SYNC_POINT_CALLBACK(
"CacheEntryStatsCollector::GetStats:AfterApplyToAllEntries", nullptr);
uint64_t end_time_micros = clock_->NowMicros();
last_end_time_micros_ = end_time_micros;
working_stats_.EndCollection(cache_, clock_, end_time_micros);
} else {
working_stats_.SkippedCollection();
}
// Save so that we don't need to wait for an outstanding collection in
// order to make of copy of the last saved stats
std::lock_guard<std::mutex> lock2(saved_mutex_);
saved_stats_ = working_stats_;
}
// Gets saved stats, regardless of age
void GetStats(Stats *stats) {
std::lock_guard<std::mutex> lock(saved_mutex_);
*stats = saved_stats_;
}
Cache *GetCache() const { return cache_; }
// Gets or creates a shared instance of CacheEntryStatsCollector in the
// cache itself, and saves into `ptr`. This shared_ptr will hold the
// entry in cache until all refs are destroyed.
static Status GetShared(Cache *cache, SystemClock *clock,
std::shared_ptr<CacheEntryStatsCollector> *ptr) {
const Slice &cache_key = GetCacheKey();
Cache::Handle *h = cache->Lookup(cache_key);
if (h == nullptr) {
// Not yet in cache, but Cache doesn't provide a built-in way to
// avoid racing insert. So we double-check under a shared mutex,
// inspired by TableCache.
STATIC_AVOID_DESTRUCTION(std::mutex, static_mutex);
std::lock_guard<std::mutex> lock(static_mutex);
h = cache->Lookup(cache_key);
if (h == nullptr) {
auto new_ptr = new CacheEntryStatsCollector(cache, clock);
// TODO: non-zero charge causes some tests that count block cache
// usage to go flaky. Fix the problem somehow so we can use an
// accurate charge.
size_t charge = 0;
Status s = cache->Insert(cache_key, new_ptr, charge, Deleter, &h,
Cache::Priority::HIGH);
if (!s.ok()) {
assert(h == nullptr);
delete new_ptr;
return s;
}
}
}
// If we reach here, shared entry is in cache with handle `h`.
assert(cache->GetDeleter(h) == Deleter);
// Build an aliasing shared_ptr that keeps `ptr` in cache while there
// are references.
*ptr = MakeSharedCacheHandleGuard<CacheEntryStatsCollector>(cache, h);
return Status::OK();
}
private:
explicit CacheEntryStatsCollector(Cache *cache, SystemClock *clock)
: saved_stats_(),
working_stats_(),
last_start_time_micros_(0),
last_end_time_micros_(/*pessimistic*/ 10000000),
cache_(cache),
clock_(clock) {}
static void Deleter(const Slice &, void *value) {
delete static_cast<CacheEntryStatsCollector *>(value);
}
static const Slice &GetCacheKey() {
// For each template instantiation
static CacheKey ckey = CacheKey::CreateUniqueForProcessLifetime();
static Slice ckey_slice = ckey.AsSlice();
return ckey_slice;
}
std::mutex saved_mutex_;
Stats saved_stats_;
std::mutex working_mutex_;
Stats working_stats_;
uint64_t last_start_time_micros_;
uint64_t last_end_time_micros_;
Cache *const cache_;
SystemClock *const clock_;
};
} // namespace ROCKSDB_NAMESPACE

125
cache/cache_helpers.h vendored
View File

@ -1,125 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#include <cassert>
#include "rocksdb/cache.h"
#include "rocksdb/rocksdb_namespace.h"
namespace ROCKSDB_NAMESPACE {
// Returns the cached value given a cache handle.
template <typename T>
T* GetFromCacheHandle(Cache* cache, Cache::Handle* handle) {
assert(cache);
assert(handle);
return static_cast<T*>(cache->Value(handle));
}
// Simple generic deleter for Cache (to be used with Cache::Insert).
template <typename T>
void DeleteCacheEntry(const Slice& /* key */, void* value) {
delete static_cast<T*>(value);
}
// Turns a T* into a Slice so it can be used as a key with Cache.
template <typename T>
Slice GetSlice(const T* t) {
return Slice(reinterpret_cast<const char*>(t), sizeof(T));
}
// Generic resource management object for cache handles that releases the handle
// when destroyed. Has unique ownership of the handle, so copying it is not
// allowed, while moving it transfers ownership.
template <typename T>
class CacheHandleGuard {
public:
CacheHandleGuard() = default;
CacheHandleGuard(Cache* cache, Cache::Handle* handle)
: cache_(cache),
handle_(handle),
value_(GetFromCacheHandle<T>(cache, handle)) {
assert(cache_ && handle_ && value_);
}
CacheHandleGuard(const CacheHandleGuard&) = delete;
CacheHandleGuard& operator=(const CacheHandleGuard&) = delete;
CacheHandleGuard(CacheHandleGuard&& rhs) noexcept
: cache_(rhs.cache_), handle_(rhs.handle_), value_(rhs.value_) {
assert((!cache_ && !handle_ && !value_) || (cache_ && handle_ && value_));
rhs.ResetFields();
}
CacheHandleGuard& operator=(CacheHandleGuard&& rhs) noexcept {
if (this == &rhs) {
return *this;
}
ReleaseHandle();
cache_ = rhs.cache_;
handle_ = rhs.handle_;
value_ = rhs.value_;
assert((!cache_ && !handle_ && !value_) || (cache_ && handle_ && value_));
rhs.ResetFields();
return *this;
}
~CacheHandleGuard() { ReleaseHandle(); }
bool IsEmpty() const { return !handle_; }
Cache* GetCache() const { return cache_; }
Cache::Handle* GetCacheHandle() const { return handle_; }
T* GetValue() const { return value_; }
void Reset() {
ReleaseHandle();
ResetFields();
}
private:
void ReleaseHandle() {
if (IsEmpty()) {
return;
}
assert(cache_);
cache_->Release(handle_);
}
void ResetFields() {
cache_ = nullptr;
handle_ = nullptr;
value_ = nullptr;
}
private:
Cache* cache_ = nullptr;
Cache::Handle* handle_ = nullptr;
T* value_ = nullptr;
};
// Build an aliasing shared_ptr that keeps `handle` in cache while there
// are references, but the pointer is to the value for that cache entry,
// which must be of type T. This is copyable, unlike CacheHandleGuard, but
// does not provide access to caching details.
template <typename T>
std::shared_ptr<T> MakeSharedCacheHandleGuard(Cache* cache,
Cache::Handle* handle) {
auto wrapper = std::make_shared<CacheHandleGuard<T>>(cache, handle);
return std::shared_ptr<T>(wrapper, static_cast<T*>(cache->Value(handle)));
}
} // namespace ROCKSDB_NAMESPACE

344
cache/cache_key.cc vendored
View File

@ -1,344 +0,0 @@
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "cache/cache_key.h"
#include <algorithm>
#include <atomic>
#include "rocksdb/cache.h"
#include "table/unique_id_impl.h"
#include "util/hash.h"
#include "util/math.h"
namespace ROCKSDB_NAMESPACE {
// Value space plan for CacheKey:
//
// session_etc64_ | offset_etc64_ | Only generated by
// ---------------+---------------+------------------------------------------
// 0 | 0 | Reserved for "empty" CacheKey()
// 0 | > 0, < 1<<63 | CreateUniqueForCacheLifetime
// 0 | >= 1<<63 | CreateUniqueForProcessLifetime
// > 0 | any | OffsetableCacheKey.WithOffset
CacheKey CacheKey::CreateUniqueForCacheLifetime(Cache *cache) {
// +1 so that we can reserve all zeros for "unset" cache key
uint64_t id = cache->NewId() + 1;
// Ensure we don't collide with CreateUniqueForProcessLifetime
assert((id >> 63) == 0U);
return CacheKey(0, id);
}
CacheKey CacheKey::CreateUniqueForProcessLifetime() {
// To avoid colliding with CreateUniqueForCacheLifetime, assuming
// Cache::NewId counts up from zero, here we count down from UINT64_MAX.
// If this ever becomes a point of contention, we could sub-divide the
// space and use CoreLocalArray.
static std::atomic<uint64_t> counter{UINT64_MAX};
uint64_t id = counter.fetch_sub(1, std::memory_order_relaxed);
// Ensure we don't collide with CreateUniqueForCacheLifetime
assert((id >> 63) == 1U);
return CacheKey(0, id);
}
// Value plan for CacheKeys from OffsetableCacheKey, assuming that
// db_session_ids are generated from a base_session_id and
// session_id_counter (by SemiStructuredUniqueIdGen+EncodeSessionId
// in DBImpl::GenerateDbSessionId):
//
// Conceptual inputs:
// db_id (unstructured, from GenerateRawUniqueId or equiv)
// * could be shared between cloned DBs but rare
// * could be constant, if session id suffices
// base_session_id (unstructured, from GenerateRawUniqueId)
// session_id_counter (structured)
// * usually much smaller than 2**24
// file_number (structured)
// * usually smaller than 2**24
// offset_in_file (structured, might skip lots of values)
// * usually smaller than 2**32
// max_offset determines placement of file_number to prevent
// overlapping with offset
//
// Outputs come from bitwise-xor of the constituent pieces, low bits on left:
//
// |------------------------- session_etc64 -------------------------|
// | +++++++++++++++ base_session_id (lower 64 bits) +++++++++++++++ |
// |-----------------------------------------------------------------|
// | session_id_counter ...| |
// |-----------------------------------------------------------------|
// | | ... file_number |
// | | overflow & meta |
// |-----------------------------------------------------------------|
//
//
// |------------------------- offset_etc64 --------------------------|
// | hash of: ++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
// | * base_session_id (upper ~39 bits) |
// | * db_id (~122 bits entropy) |
// |-----------------------------------------------------------------|
// | offset_in_file ............... | |
// |-----------------------------------------------------------------|
// | | file_number, 0-3 |
// | | lower bytes |
// |-----------------------------------------------------------------|
//
// Based on max_offset, a maximal number of bytes 0..3 is chosen for
// including from lower bits of file_number in offset_etc64. The choice
// is encoded in two bits of metadata going into session_etc64, though
// the common case of 3 bytes is encoded as 0 so that session_etc64
// is unmodified by file_number concerns in the common case.
//
// There is nothing preventing "file number overflow & meta" from meeting
// and overlapping with session_id_counter, but reaching such a case requires
// an intractable combination of large file offsets (thus at least some large
// files), large file numbers (thus large number of files generated), and
// large number of session IDs generated in a single process. A trillion each
// (2**40) of session ids, offsets, and file numbers comes to 120 bits.
// With two bits of metadata and byte granularity, this is on the verge of
// overlap, but even in the overlap case, it doesn't seem likely that
// a file from billions of files or session ids ago will still be live
// or cached.
//
// In fact, if our SST files are all < 4TB (see
// BlockBasedTable::kMaxFileSizeStandardEncoding), then SST files generated
// in a single process are guaranteed to have unique cache keys, unless/until
// number session ids * max file number = 2**86, e.g. 1 trillion DB::Open in
// a single process and 64 trillion files generated. Even at that point, to
// see a collision we would need a miraculous re-synchronization of session
// id and file number, along with a live file or stale cache entry from
// trillions of files ago.
//
// How https://github.com/pdillinger/unique_id applies here:
// Every bit of output always includes "unstructured" uniqueness bits and
// often combines with "structured" uniqueness bits. The "unstructured" bits
// change infrequently: only when we cannot guarantee our state tracking for
// "structured" uniqueness hasn't been cloned. Using a static
// SemiStructuredUniqueIdGen for db_session_ids, this means we only get an
// "all new" session id when a new process uses RocksDB. (Between processes,
// we don't know if a DB or other persistent storage has been cloned. We
// assume that if VM hot cloning is used, subsequently generated SST files
// do not interact.) Within a process, only the session_lower of the
// db_session_id changes incrementally ("structured" uniqueness).
//
// This basically means that our offsets, counters and file numbers allow us
// to do somewhat "better than random" (birthday paradox) while in the
// degenerate case of completely new session for each tiny file, we still
// have strong uniqueness properties from the birthday paradox, with ~103
// bit session IDs or up to 128 bits entropy with different DB IDs sharing a
// cache.
//
// More collision probability analysis:
// Suppose a RocksDB host generates (generously) 2 GB/s (10TB data, 17 DWPD)
// with average process/session lifetime of (pessimistically) 4 minutes.
// In 180 days (generous allowable data lifespan), we generate 31 million GB
// of data, or 2^55 bytes, and 2^16 "all new" session IDs.
//
// First, suppose this is in a single DB (lifetime 180 days):
// 128 bits cache key size
// - 55 <- ideal size for byte offsets + file numbers
// - 2 <- bits for offsets and file numbers not exactly powers of two
// - 2 <- bits for file number encoding metadata
// + 2 <- bits saved not using byte offsets in BlockBasedTable::GetCacheKey
// ----
// 71 <- bits remaining for distinguishing session IDs
// The probability of a collision in 71 bits of session ID data is less than
// 1 in 2**(71 - (2 * 16)), or roughly 1 in a trillion. And this assumes all
// data from the last 180 days is in cache for potential collision, and that
// cache keys under each session id exhaustively cover the remaining 57 bits
// while in reality they'll only cover a small fraction of it.
//
// Although data could be transferred between hosts, each host has its own
// cache and we are already assuming a high rate of "all new" session ids.
// So this doesn't really change the collision calculation. Across a fleet
// of 1 million, each with <1 in a trillion collision possibility,
// fleetwide collision probability is <1 in a million.
//
// Now suppose we have many DBs per host, say 2**10, with same host-wide write
// rate and process/session lifetime. File numbers will be ~10 bits smaller
// and we will have 2**10 times as many session IDs because of simultaneous
// lifetimes. So now collision chance is less than 1 in 2**(81 - (2 * 26)),
// or roughly 1 in a billion.
//
// Suppose instead we generated random or hashed cache keys for each
// (compressed) block. For 1KB compressed block size, that is 2^45 cache keys
// in 180 days. Collision probability is more easily estimated at roughly
// 1 in 2**(128 - (2 * 45)) or roughly 1 in a trillion (assuming all
// data from the last 180 days is in cache, but NOT the other assumption
// for the 1 in a trillion estimate above).
//
//
// Collision probability estimation through simulation:
// A tool ./cache_bench -stress_cache_key broadly simulates host-wide cache
// activity over many months, by making some pessimistic simplifying
// assumptions. See class StressCacheKey in cache_bench_tool.cc for details.
// Here is some sample output with
// `./cache_bench -stress_cache_key -sck_keep_bits=40`:
//
// Total cache or DBs size: 32TiB Writing 925.926 MiB/s or 76.2939TiB/day
// Multiply by 9.22337e+18 to correct for simulation losses (but still
// assume whole file cached)
//
// These come from default settings of 2.5M files per day of 32 MB each, and
// `-sck_keep_bits=40` means that to represent a single file, we are only
// keeping 40 bits of the 128-bit (base) cache key. With file size of 2**25
// contiguous keys (pessimistic), our simulation is about 2\*\*(128-40-25) or
// about 9 billion billion times more prone to collision than reality.
//
// More default assumptions, relatively pessimistic:
// * 100 DBs in same process (doesn't matter much)
// * Re-open DB in same process (new session ID related to old session ID) on
// average every 100 files generated
// * Restart process (all new session IDs unrelated to old) 24 times per day
//
// After enough data, we get a result at the end (-sck_keep_bits=40):
//
// (keep 40 bits) 17 collisions after 2 x 90 days, est 10.5882 days between
// (9.76592e+19 corrected)
//
// If we believe the (pessimistic) simulation and the mathematical
// extrapolation, we would need to run a billion machines all for 97 billion
// days to expect a cache key collision. To help verify that our extrapolation
// ("corrected") is robust, we can make our simulation more precise with
// `-sck_keep_bits=41` and `42`, which takes more running time to get enough
// collision data:
//
// (keep 41 bits) 16 collisions after 4 x 90 days, est 22.5 days between
// (1.03763e+20 corrected)
// (keep 42 bits) 19 collisions after 10 x 90 days, est 47.3684 days between
// (1.09224e+20 corrected)
//
// The extrapolated prediction is very close. If anything, we might have some
// very small losses of structured data (see class StressCacheKey in
// cache_bench_tool.cc) leading to more accurate & more attractive prediction
// with more bits kept.
//
// With the `-sck_randomize` option, we can see that typical workloads like
// above have lower collision probability than "random" cache keys (note:
// offsets still non-randomized) by a modest amount (roughly 20x less collision
// prone than random), which should make us reasonably comfortable even in
// "degenerate" cases (e.g. repeatedly launch a process to generate 1 file
// with SstFileWriter):
//
// (rand 40 bits) 197 collisions after 1 x 90 days, est 0.456853 days between
// (4.21372e+18 corrected)
//
// We can see that with more frequent process restarts (all new session IDs),
// we get closer to the "random" cache key performance:
//
// (-sck_restarts_per_day=5000): 140 collisions after 1 x 90 days, ...
// (5.92931e+18 corrected)
//
// Other tests have been run to validate other conditions behave as expected,
// never behaving "worse than random" unless we start chopping off structured
// data.
//
//
// Conclusion: Even in extreme cases, rapidly burning through "all new" IDs
// that only arise when a new process is started, the chance of any cache key
// collisions in a giant fleet of machines is negligible. Especially when
// processes live for hours or days, the chance of a cache key collision is
// likely more plausibly due to bad hardware than to bad luck in random
// session ID data. Software defects are surely more likely to cause corruption
// than both of those.
//
// TODO: Nevertheless / regardless, an efficient way to detect (and thus
// quantify) block cache corruptions, including collisions, should be added.
OffsetableCacheKey::OffsetableCacheKey(const std::string &db_id,
const std::string &db_session_id,
uint64_t file_number,
uint64_t max_offset) {
#ifndef NDEBUG
max_offset_ = max_offset;
#endif
// Closely related to GetSstInternalUniqueId, but only need 128 bits and
// need to include an offset within the file.
// See also https://github.com/pdillinger/unique_id for background.
uint64_t session_upper = 0; // Assignment to appease clang-analyze
uint64_t session_lower = 0; // Assignment to appease clang-analyze
{
Status s = DecodeSessionId(db_session_id, &session_upper, &session_lower);
if (!s.ok()) {
// A reasonable fallback in case malformed
Hash2x64(db_session_id.data(), db_session_id.size(), &session_upper,
&session_lower);
}
}
// Hash the session upper (~39 bits entropy) and DB id (120+ bits entropy)
// for more global uniqueness entropy.
// (It is possible that many DBs descended from one common DB id are copied
// around and proliferate, in which case session id is critical, but it is
// more common for different DBs to have different DB ids.)
uint64_t db_hash = Hash64(db_id.data(), db_id.size(), session_upper);
// This establishes the db+session id part of the cache key.
//
// Exactly preserve (in common cases; see modifiers below) session lower to
// ensure that session ids generated during the same process lifetime are
// guaranteed unique.
//
// We put this first for CommonPrefixSlice(), so that a small-ish set of
// cache key prefixes to cover entries relevant to any DB.
session_etc64_ = session_lower;
// This provides extra entopy in case of different DB id or process
// generating a session id, but is also partly/variably obscured by
// file_number and offset (see below).
offset_etc64_ = db_hash;
// Into offset_etc64_ we are (eventually) going to pack & xor in an offset and
// a file_number, but we might need the file_number to overflow into
// session_etc64_. (There must only be one session_etc64_ value per
// file, and preferably shared among many files.)
//
// Figure out how many bytes of file_number we are going to be able to
// pack in with max_offset, though our encoding will only support packing
// in up to 3 bytes of file_number. (16M file numbers is enough for a new
// file number every second for half a year.)
int file_number_bytes_in_offset_etc =
(63 - FloorLog2(max_offset | 0x100000000U)) / 8;
int file_number_bits_in_offset_etc = file_number_bytes_in_offset_etc * 8;
// Assert two bits of metadata
assert(file_number_bytes_in_offset_etc >= 0 &&
file_number_bytes_in_offset_etc <= 3);
// Assert we couldn't have used a larger allowed number of bytes (shift
// would chop off bytes).
assert(file_number_bytes_in_offset_etc == 3 ||
(max_offset << (file_number_bits_in_offset_etc + 8) >>
(file_number_bits_in_offset_etc + 8)) != max_offset);
uint64_t mask = (uint64_t{1} << (file_number_bits_in_offset_etc)) - 1;
// Pack into high bits of etc so that offset can go in low bits of etc
// TODO: could be EndianSwapValue?
uint64_t offset_etc_modifier = ReverseBits(file_number & mask);
assert(offset_etc_modifier << file_number_bits_in_offset_etc == 0U);
// Overflow and 3 - byte count (likely both zero) go into session_id part
uint64_t session_etc_modifier =
(file_number >> file_number_bits_in_offset_etc << 2) |
static_cast<uint64_t>(3 - file_number_bytes_in_offset_etc);
// Packed into high bits to minimize interference with session id counter.
session_etc_modifier = ReverseBits(session_etc_modifier);
// Assert session_id part is only modified in extreme cases
assert(session_etc_modifier == 0 || file_number > /*3 bytes*/ 0xffffffU ||
max_offset > /*5 bytes*/ 0xffffffffffU);
// Xor in the modifiers
session_etc64_ ^= session_etc_modifier;
offset_etc64_ ^= offset_etc_modifier;
// Although DBImpl guarantees (in recent versions) that session_lower is not
// zero, that's not entirely sufficient to guarantee that session_etc64_ is
// not zero (so that the 0 case can be used by CacheKey::CreateUnique*)
if (session_etc64_ == 0U) {
session_etc64_ = session_upper | 1U;
}
assert(session_etc64_ != 0);
}
} // namespace ROCKSDB_NAMESPACE

132
cache/cache_key.h vendored
View File

@ -1,132 +0,0 @@
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#include <cstdint>
#include "rocksdb/rocksdb_namespace.h"
#include "rocksdb/slice.h"
namespace ROCKSDB_NAMESPACE {
class Cache;
// A standard holder for fixed-size block cache keys (and for related caches).
// They are created through one of these, each using its own range of values:
// * CacheKey::CreateUniqueForCacheLifetime
// * CacheKey::CreateUniqueForProcessLifetime
// * Default ctor ("empty" cache key)
// * OffsetableCacheKey->WithOffset
//
// The first two use atomic counters to guarantee uniqueness over the given
// lifetime and the last uses a form of universally unique identifier for
// uniqueness with very high probabilty (and guaranteed for files generated
// during a single process lifetime).
//
// CacheKeys are currently used by calling AsSlice() to pass as a key to
// Cache. For performance, the keys are endianness-dependent (though otherwise
// portable). (Persistable cache entries are not intended to cross platforms.)
class CacheKey {
public:
// For convenience, constructs an "empty" cache key that is never returned
// by other means.
inline CacheKey() : session_etc64_(), offset_etc64_() {}
inline bool IsEmpty() const {
return (session_etc64_ == 0) & (offset_etc64_ == 0);
}
// Use this cache key as a Slice (byte order is endianness-dependent)
inline Slice AsSlice() const {
static_assert(sizeof(*this) == 16, "Standardized on 16-byte cache key");
assert(!IsEmpty());
return Slice(reinterpret_cast<const char *>(this), sizeof(*this));
}
// Create a CacheKey that is unique among others associated with this Cache
// instance. Depends on Cache::NewId. This is useful for block cache
// "reservations".
static CacheKey CreateUniqueForCacheLifetime(Cache *cache);
// Create a CacheKey that is unique among others for the lifetime of this
// process. This is useful for saving in a static data member so that
// different DB instances can agree on a cache key for shared entities,
// such as for CacheEntryStatsCollector.
static CacheKey CreateUniqueForProcessLifetime();
protected:
friend class OffsetableCacheKey;
CacheKey(uint64_t session_etc64, uint64_t offset_etc64)
: session_etc64_(session_etc64), offset_etc64_(offset_etc64) {}
uint64_t session_etc64_;
uint64_t offset_etc64_;
};
// A file-specific generator of cache keys, sometimes referred to as the
// "base" cache key for a file because all the cache keys for various offsets
// within the file are computed using simple arithmetic. The basis for the
// general approach is dicussed here: https://github.com/pdillinger/unique_id
// Heavily related to GetUniqueIdFromTableProperties.
//
// If the db_id, db_session_id, and file_number come from the file's table
// properties, then the keys will be stable across DB::Open/Close, backup/
// restore, import/export, etc.
//
// This class "is a" CacheKey only privately so that it is not misused as
// a ready-to-use CacheKey.
class OffsetableCacheKey : private CacheKey {
public:
// For convenience, constructs an "empty" cache key that should not be used.
inline OffsetableCacheKey() : CacheKey() {}
// Constructs an OffsetableCacheKey with the given information about a file.
// max_offset is based on file size (see WithOffset) and is required here to
// choose an appropriate (sub-)encoding. This constructor never generates an
// "empty" base key.
OffsetableCacheKey(const std::string &db_id, const std::string &db_session_id,
uint64_t file_number, uint64_t max_offset);
inline bool IsEmpty() const {
bool result = session_etc64_ == 0;
assert(!(offset_etc64_ > 0 && result));
return result;
}
// Construct a CacheKey for an offset within a file, which must be
// <= max_offset provided in constructor. An offset is not necessarily a
// byte offset if a smaller unique identifier of keyable offsets is used.
//
// This class was designed to make this hot code extremely fast.
inline CacheKey WithOffset(uint64_t offset) const {
assert(!IsEmpty());
assert(offset <= max_offset_);
return CacheKey(session_etc64_, offset_etc64_ ^ offset);
}
// The "common prefix" is a shared prefix for all the returned CacheKeys,
// that also happens to usually be the same among many files in the same DB,
// so is efficient and highly accurate (not perfectly) for DB-specific cache
// dump selection (but not file-specific).
static constexpr size_t kCommonPrefixSize = 8;
inline Slice CommonPrefixSlice() const {
static_assert(sizeof(session_etc64_) == kCommonPrefixSize,
"8 byte common prefix expected");
assert(!IsEmpty());
assert(&this->session_etc64_ == static_cast<const void *>(this));
return Slice(reinterpret_cast<const char *>(this), kCommonPrefixSize);
}
// For any max_offset <= this value, the same encoding scheme is guaranteed.
static constexpr uint64_t kMaxOffsetStandardEncoding = 0xffffffffffU;
private:
#ifndef NDEBUG
uint64_t max_offset_ = 0;
#endif
};
} // namespace ROCKSDB_NAMESPACE

View File

@ -1,183 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "cache/cache_reservation_manager.h"
#include <cassert>
#include <cstddef>
#include <cstring>
#include <memory>
#include "cache/cache_entry_roles.h"
#include "rocksdb/cache.h"
#include "rocksdb/slice.h"
#include "rocksdb/status.h"
#include "table/block_based/reader_common.h"
#include "util/coding.h"
namespace ROCKSDB_NAMESPACE {
template <CacheEntryRole R>
CacheReservationManagerImpl<R>::CacheReservationHandle::CacheReservationHandle(
std::size_t incremental_memory_used,
std::shared_ptr<CacheReservationManagerImpl> cache_res_mgr)
: incremental_memory_used_(incremental_memory_used) {
assert(cache_res_mgr);
cache_res_mgr_ = cache_res_mgr;
}
template <CacheEntryRole R>
CacheReservationManagerImpl<
R>::CacheReservationHandle::~CacheReservationHandle() {
Status s = cache_res_mgr_->ReleaseCacheReservation(incremental_memory_used_);
s.PermitUncheckedError();
}
template <CacheEntryRole R>
CacheReservationManagerImpl<R>::CacheReservationManagerImpl(
std::shared_ptr<Cache> cache, bool delayed_decrease)
: delayed_decrease_(delayed_decrease),
cache_allocated_size_(0),
memory_used_(0) {
assert(cache != nullptr);
cache_ = cache;
}
template <CacheEntryRole R>
CacheReservationManagerImpl<R>::~CacheReservationManagerImpl() {
for (auto* handle : dummy_handles_) {
cache_->Release(handle, true);
}
}
template <CacheEntryRole R>
Status CacheReservationManagerImpl<R>::UpdateCacheReservation(
std::size_t new_mem_used) {
memory_used_ = new_mem_used;
std::size_t cur_cache_allocated_size =
cache_allocated_size_.load(std::memory_order_relaxed);
if (new_mem_used == cur_cache_allocated_size) {
return Status::OK();
} else if (new_mem_used > cur_cache_allocated_size) {
Status s = IncreaseCacheReservation(new_mem_used);
return s;
} else {
// In delayed decrease mode, we don't decrease cache reservation
// untill the memory usage is less than 3/4 of what we reserve
// in the cache.
// We do this because
// (1) Dummy entry insertion is expensive in block cache
// (2) Delayed releasing previously inserted dummy entries can save such
// expensive dummy entry insertion on memory increase in the near future,
// which is likely to happen when the memory usage is greater than or equal
// to 3/4 of what we reserve
if (delayed_decrease_ && new_mem_used >= cur_cache_allocated_size / 4 * 3) {
return Status::OK();
} else {
Status s = DecreaseCacheReservation(new_mem_used);
return s;
}
}
}
template <CacheEntryRole R>
Status CacheReservationManagerImpl<R>::MakeCacheReservation(
std::size_t incremental_memory_used,
std::unique_ptr<CacheReservationManager::CacheReservationHandle>* handle) {
assert(handle);
Status s =
UpdateCacheReservation(GetTotalMemoryUsed() + incremental_memory_used);
(*handle).reset(new CacheReservationManagerImpl::CacheReservationHandle(
incremental_memory_used,
std::enable_shared_from_this<
CacheReservationManagerImpl<R>>::shared_from_this()));
return s;
}
template <CacheEntryRole R>
Status CacheReservationManagerImpl<R>::ReleaseCacheReservation(
std::size_t incremental_memory_used) {
assert(GetTotalMemoryUsed() >= incremental_memory_used);
std::size_t updated_total_mem_used =
GetTotalMemoryUsed() - incremental_memory_used;
Status s = UpdateCacheReservation(updated_total_mem_used);
return s;
}
template <CacheEntryRole R>
Status CacheReservationManagerImpl<R>::IncreaseCacheReservation(
std::size_t new_mem_used) {
Status return_status = Status::OK();
while (new_mem_used > cache_allocated_size_.load(std::memory_order_relaxed)) {
Cache::Handle* handle = nullptr;
return_status = cache_->Insert(GetNextCacheKey(), nullptr, kSizeDummyEntry,
GetNoopDeleterForRole<R>(), &handle);
if (return_status != Status::OK()) {
return return_status;
}
dummy_handles_.push_back(handle);
cache_allocated_size_ += kSizeDummyEntry;
}
return return_status;
}
template <CacheEntryRole R>
Status CacheReservationManagerImpl<R>::DecreaseCacheReservation(
std::size_t new_mem_used) {
Status return_status = Status::OK();
// Decrease to the smallest multiple of kSizeDummyEntry that is greater than
// or equal to new_mem_used We do addition instead of new_mem_used <=
// cache_allocated_size_.load(std::memory_order_relaxed) - kSizeDummyEntry to
// avoid underflow of size_t when cache_allocated_size_ = 0
while (new_mem_used + kSizeDummyEntry <=
cache_allocated_size_.load(std::memory_order_relaxed)) {
assert(!dummy_handles_.empty());
auto* handle = dummy_handles_.back();
cache_->Release(handle, true);
dummy_handles_.pop_back();
cache_allocated_size_ -= kSizeDummyEntry;
}
return return_status;
}
template <CacheEntryRole R>
std::size_t CacheReservationManagerImpl<R>::GetTotalReservedCacheSize() {
return cache_allocated_size_.load(std::memory_order_relaxed);
}
template <CacheEntryRole R>
std::size_t CacheReservationManagerImpl<R>::GetTotalMemoryUsed() {
return memory_used_;
}
template <CacheEntryRole R>
Slice CacheReservationManagerImpl<R>::GetNextCacheKey() {
// Calling this function will have the side-effect of changing the
// underlying cache_key_ that is shared among other keys generated from this
// fucntion. Therefore please make sure the previous keys are saved/copied
// before calling this function.
cache_key_ = CacheKey::CreateUniqueForCacheLifetime(cache_.get());
return cache_key_.AsSlice();
}
template <CacheEntryRole R>
Cache::DeleterFn CacheReservationManagerImpl<R>::TEST_GetNoopDeleterForRole() {
return GetNoopDeleterForRole<R>();
}
template class CacheReservationManagerImpl<
CacheEntryRole::kBlockBasedTableReader>;
template class CacheReservationManagerImpl<
CacheEntryRole::kCompressionDictionaryBuildingBuffer>;
template class CacheReservationManagerImpl<CacheEntryRole::kFilterConstruction>;
template class CacheReservationManagerImpl<CacheEntryRole::kMisc>;
template class CacheReservationManagerImpl<CacheEntryRole::kWriteBuffer>;
} // namespace ROCKSDB_NAMESPACE

View File

@ -1,288 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#pragma once
#include <atomic>
#include <cstddef>
#include <cstdint>
#include <memory>
#include <mutex>
#include <vector>
#include "cache/cache_entry_roles.h"
#include "cache/cache_key.h"
#include "rocksdb/cache.h"
#include "rocksdb/slice.h"
#include "rocksdb/status.h"
#include "util/coding.h"
namespace ROCKSDB_NAMESPACE {
// CacheReservationManager is an interface for reserving cache space for the
// memory used
class CacheReservationManager {
public:
// CacheReservationHandle is for managing the lifetime of a cache reservation
// for an incremental amount of memory used (i.e, incremental_memory_used)
class CacheReservationHandle {
public:
virtual ~CacheReservationHandle() {}
};
virtual ~CacheReservationManager() {}
virtual Status UpdateCacheReservation(std::size_t new_memory_used) = 0;
virtual Status MakeCacheReservation(
std::size_t incremental_memory_used,
std::unique_ptr<CacheReservationManager::CacheReservationHandle>
*handle) = 0;
virtual std::size_t GetTotalReservedCacheSize() = 0;
virtual std::size_t GetTotalMemoryUsed() = 0;
};
// CacheReservationManagerImpl implements interface CacheReservationManager
// for reserving cache space for the memory used by inserting/releasing dummy
// entries in the cache.
//
// This class is NOT thread-safe, except that GetTotalReservedCacheSize()
// can be called without external synchronization.
template <CacheEntryRole R>
class CacheReservationManagerImpl
: public CacheReservationManager,
public std::enable_shared_from_this<CacheReservationManagerImpl<R>> {
public:
class CacheReservationHandle
: public CacheReservationManager::CacheReservationHandle {
public:
CacheReservationHandle(
std::size_t incremental_memory_used,
std::shared_ptr<CacheReservationManagerImpl> cache_res_mgr);
~CacheReservationHandle() override;
private:
std::size_t incremental_memory_used_;
std::shared_ptr<CacheReservationManagerImpl> cache_res_mgr_;
};
// Construct a CacheReservationManagerImpl
// @param cache The cache where dummy entries are inserted and released for
// reserving cache space
// @param delayed_decrease If set true, then dummy entries won't be released
// immediately when memory usage decreases.
// Instead, it will be released when the memory usage
// decreases to 3/4 of what we have reserved so far.
// This is for saving some future dummy entry
// insertion when memory usage increases are likely to
// happen in the near future.
//
// REQUIRED: cache is not nullptr
explicit CacheReservationManagerImpl(std::shared_ptr<Cache> cache,
bool delayed_decrease = false);
// no copy constructor, copy assignment, move constructor, move assignment
CacheReservationManagerImpl(const CacheReservationManagerImpl &) = delete;
CacheReservationManagerImpl &operator=(const CacheReservationManagerImpl &) =
delete;
CacheReservationManagerImpl(CacheReservationManagerImpl &&) = delete;
CacheReservationManagerImpl &operator=(CacheReservationManagerImpl &&) =
delete;
~CacheReservationManagerImpl() override;
// One of the two ways of reserving/releasing cache space,
// see MakeCacheReservation() for the other.
//
// Use ONLY one of these two ways to prevent unexpected behavior.
//
// Insert and release dummy entries in the cache to
// match the size of total dummy entries with the least multiple of
// kSizeDummyEntry greater than or equal to new_mem_used
//
// Insert dummy entries if new_memory_used > cache_allocated_size_;
//
// Release dummy entries if new_memory_used < cache_allocated_size_
// (and new_memory_used < cache_allocated_size_ * 3/4
// when delayed_decrease is set true);
//
// Keey dummy entries the same if (1) new_memory_used == cache_allocated_size_
// or (2) new_memory_used is in the interval of
// [cache_allocated_size_ * 3/4, cache_allocated_size) when delayed_decrease
// is set true.
//
// @param new_memory_used The number of bytes used by new memory
// The most recent new_memoy_used passed in will be returned
// in GetTotalMemoryUsed() even when the call return non-ok status.
//
// Since the class is NOT thread-safe, external synchronization on the
// order of calling UpdateCacheReservation() is needed if you want
// GetTotalMemoryUsed() indeed returns the latest memory used.
//
// @return On inserting dummy entries, it returns Status::OK() if all dummy
// entry insertions succeed.
// Otherwise, it returns the first non-ok status;
// On releasing dummy entries, it always returns Status::OK().
// On keeping dummy entries the same, it always returns Status::OK().
Status UpdateCacheReservation(std::size_t new_memory_used) override;
// One of the two ways of reserving cache space and releasing is done through
// destruction of CacheReservationHandle.
// See UpdateCacheReservation() for the other way.
//
// Use ONLY one of these two ways to prevent unexpected behavior.
//
// Insert dummy entries in the cache for the incremental memory usage
// to match the size of total dummy entries with the least multiple of
// kSizeDummyEntry greater than or equal to the total memory used.
//
// A CacheReservationHandle is returned as an output parameter.
// The reserved dummy entries are automatically released on the destruction of
// this handle, which achieves better RAII per cache reservation.
//
// WARNING: Deallocate all the handles of the CacheReservationManager object
// before deallocating the object to prevent unexpected behavior.
//
// @param incremental_memory_used The number of bytes increased in memory
// usage.
//
// Calling GetTotalMemoryUsed() afterward will return the total memory
// increased by this number, even when calling MakeCacheReservation()
// returns non-ok status.
//
// Since the class is NOT thread-safe, external synchronization in
// calling MakeCacheReservation() is needed if you want
// GetTotalMemoryUsed() indeed returns the latest memory used.
//
// @param handle An pointer to std::unique_ptr<CacheReservationHandle> that
// manages the lifetime of the cache reservation represented by the
// handle.
//
// @return It returns Status::OK() if all dummy
// entry insertions succeed.
// Otherwise, it returns the first non-ok status;
//
// REQUIRES: handle != nullptr
Status MakeCacheReservation(
std::size_t incremental_memory_used,
std::unique_ptr<CacheReservationManager::CacheReservationHandle> *handle)
override;
// Return the size of the cache (which is a multiple of kSizeDummyEntry)
// successfully reserved by calling UpdateCacheReservation().
//
// When UpdateCacheReservation() returns non-ok status,
// calling GetTotalReservedCacheSize() after that might return a slightly
// smaller number than the actual reserved cache size due to
// the returned number will always be a multiple of kSizeDummyEntry
// and cache full might happen in the middle of inserting a dummy entry.
std::size_t GetTotalReservedCacheSize() override;
// Return the latest total memory used indicated by the most recent call of
// UpdateCacheReservation(std::size_t new_memory_used);
std::size_t GetTotalMemoryUsed() override;
static constexpr std::size_t GetDummyEntrySize() { return kSizeDummyEntry; }
// For testing only - it is to help ensure the NoopDeleterForRole<R>
// accessed from CacheReservationManagerImpl and the one accessed from the
// test are from the same translation units
static Cache::DeleterFn TEST_GetNoopDeleterForRole();
private:
static constexpr std::size_t kSizeDummyEntry = 256 * 1024;
Slice GetNextCacheKey();
Status ReleaseCacheReservation(std::size_t incremental_memory_used);
Status IncreaseCacheReservation(std::size_t new_mem_used);
Status DecreaseCacheReservation(std::size_t new_mem_used);
std::shared_ptr<Cache> cache_;
bool delayed_decrease_;
std::atomic<std::size_t> cache_allocated_size_;
std::size_t memory_used_;
std::vector<Cache::Handle *> dummy_handles_;
CacheKey cache_key_;
};
class ConcurrentCacheReservationManager
: public CacheReservationManager,
public std::enable_shared_from_this<ConcurrentCacheReservationManager> {
public:
class CacheReservationHandle
: public CacheReservationManager::CacheReservationHandle {
public:
CacheReservationHandle(
std::shared_ptr<ConcurrentCacheReservationManager> cache_res_mgr,
std::unique_ptr<CacheReservationManager::CacheReservationHandle>
cache_res_handle) {
assert(cache_res_mgr && cache_res_handle);
cache_res_mgr_ = cache_res_mgr;
cache_res_handle_ = std::move(cache_res_handle);
}
~CacheReservationHandle() override {
std::lock_guard<std::mutex> lock(cache_res_mgr_->cache_res_mgr_mu_);
cache_res_handle_.reset();
}
private:
std::shared_ptr<ConcurrentCacheReservationManager> cache_res_mgr_;
std::unique_ptr<CacheReservationManager::CacheReservationHandle>
cache_res_handle_;
};
explicit ConcurrentCacheReservationManager(
std::shared_ptr<CacheReservationManager> cache_res_mgr) {
cache_res_mgr_ = std::move(cache_res_mgr);
}
ConcurrentCacheReservationManager(const ConcurrentCacheReservationManager &) =
delete;
ConcurrentCacheReservationManager &operator=(
const ConcurrentCacheReservationManager &) = delete;
ConcurrentCacheReservationManager(ConcurrentCacheReservationManager &&) =
delete;
ConcurrentCacheReservationManager &operator=(
ConcurrentCacheReservationManager &&) = delete;
~ConcurrentCacheReservationManager() override {}
inline Status UpdateCacheReservation(std::size_t new_memory_used) override {
std::lock_guard<std::mutex> lock(cache_res_mgr_mu_);
return cache_res_mgr_->UpdateCacheReservation(new_memory_used);
}
inline Status MakeCacheReservation(
std::size_t incremental_memory_used,
std::unique_ptr<CacheReservationManager::CacheReservationHandle> *handle)
override {
std::unique_ptr<CacheReservationManager::CacheReservationHandle>
wrapped_handle;
Status s;
{
std::lock_guard<std::mutex> lock(cache_res_mgr_mu_);
s = cache_res_mgr_->MakeCacheReservation(incremental_memory_used,
&wrapped_handle);
}
(*handle).reset(
new ConcurrentCacheReservationManager::CacheReservationHandle(
std::enable_shared_from_this<
ConcurrentCacheReservationManager>::shared_from_this(),
std::move(wrapped_handle)));
return s;
}
inline std::size_t GetTotalReservedCacheSize() override {
return cache_res_mgr_->GetTotalReservedCacheSize();
}
inline std::size_t GetTotalMemoryUsed() override {
std::lock_guard<std::mutex> lock(cache_res_mgr_mu_);
return cache_res_mgr_->GetTotalMemoryUsed();
}
private:
std::mutex cache_res_mgr_mu_;
std::shared_ptr<CacheReservationManager> cache_res_mgr_;
};
} // namespace ROCKSDB_NAMESPACE

View File

@ -1,468 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "cache/cache_reservation_manager.h"
#include <cstddef>
#include <cstring>
#include <memory>
#include "cache/cache_entry_roles.h"
#include "rocksdb/cache.h"
#include "rocksdb/slice.h"
#include "test_util/testharness.h"
#include "util/coding.h"
namespace ROCKSDB_NAMESPACE {
class CacheReservationManagerTest : public ::testing::Test {
protected:
static constexpr std::size_t kSizeDummyEntry =
CacheReservationManagerImpl<CacheEntryRole::kMisc>::GetDummyEntrySize();
static constexpr std::size_t kCacheCapacity = 4096 * kSizeDummyEntry;
static constexpr int kNumShardBits = 0; // 2^0 shard
static constexpr std::size_t kMetaDataChargeOverhead = 10000;
std::shared_ptr<Cache> cache = NewLRUCache(kCacheCapacity, kNumShardBits);
std::shared_ptr<CacheReservationManager> test_cache_rev_mng;
CacheReservationManagerTest() {
test_cache_rev_mng =
std::make_shared<CacheReservationManagerImpl<CacheEntryRole::kMisc>>(
cache);
}
};
TEST_F(CacheReservationManagerTest, GenerateCacheKey) {
std::size_t new_mem_used = 1 * kSizeDummyEntry;
Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
ASSERT_EQ(s, Status::OK());
ASSERT_GE(cache->GetPinnedUsage(), 1 * kSizeDummyEntry);
ASSERT_LT(cache->GetPinnedUsage(),
1 * kSizeDummyEntry + kMetaDataChargeOverhead);
// Next unique Cache key
CacheKey ckey = CacheKey::CreateUniqueForCacheLifetime(cache.get());
// Get to the underlying values
using PairU64 = std::array<uint64_t, 2>;
auto& ckey_pair = *reinterpret_cast<PairU64*>(&ckey);
// Back it up to the one used by CRM (using CacheKey implementation details)
ckey_pair[1]--;
// Specific key (subject to implementation details)
EXPECT_EQ(ckey_pair, PairU64({0, 2}));
Cache::Handle* handle = cache->Lookup(ckey.AsSlice());
EXPECT_NE(handle, nullptr)
<< "Failed to generate the cache key for the dummy entry correctly";
// Clean up the returned handle from Lookup() to prevent memory leak
cache->Release(handle);
}
TEST_F(CacheReservationManagerTest, KeepCacheReservationTheSame) {
std::size_t new_mem_used = 1 * kSizeDummyEntry;
Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
ASSERT_EQ(s, Status::OK());
ASSERT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
1 * kSizeDummyEntry);
ASSERT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used);
std::size_t initial_pinned_usage = cache->GetPinnedUsage();
ASSERT_GE(initial_pinned_usage, 1 * kSizeDummyEntry);
ASSERT_LT(initial_pinned_usage,
1 * kSizeDummyEntry + kMetaDataChargeOverhead);
s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::OK())
<< "Failed to keep cache reservation the same when new_mem_used equals "
"to current cache reservation";
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
1 * kSizeDummyEntry)
<< "Failed to bookkeep correctly when new_mem_used equals to current "
"cache reservation";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly when new_mem_used "
"equals to current cache reservation";
EXPECT_EQ(cache->GetPinnedUsage(), initial_pinned_usage)
<< "Failed to keep underlying dummy entries the same when new_mem_used "
"equals to current cache reservation";
}
TEST_F(CacheReservationManagerTest,
IncreaseCacheReservationByMultiplesOfDummyEntrySize) {
std::size_t new_mem_used = 2 * kSizeDummyEntry;
Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::OK())
<< "Failed to increase cache reservation correctly";
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
2 * kSizeDummyEntry)
<< "Failed to bookkeep cache reservation increase correctly";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly";
EXPECT_GE(cache->GetPinnedUsage(), 2 * kSizeDummyEntry)
<< "Failed to increase underlying dummy entries in cache correctly";
EXPECT_LT(cache->GetPinnedUsage(),
2 * kSizeDummyEntry + kMetaDataChargeOverhead)
<< "Failed to increase underlying dummy entries in cache correctly";
}
TEST_F(CacheReservationManagerTest,
IncreaseCacheReservationNotByMultiplesOfDummyEntrySize) {
std::size_t new_mem_used = 2 * kSizeDummyEntry + kSizeDummyEntry / 2;
Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::OK())
<< "Failed to increase cache reservation correctly";
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
3 * kSizeDummyEntry)
<< "Failed to bookkeep cache reservation increase correctly";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly";
EXPECT_GE(cache->GetPinnedUsage(), 3 * kSizeDummyEntry)
<< "Failed to increase underlying dummy entries in cache correctly";
EXPECT_LT(cache->GetPinnedUsage(),
3 * kSizeDummyEntry + kMetaDataChargeOverhead)
<< "Failed to increase underlying dummy entries in cache correctly";
}
TEST(CacheReservationManagerIncreaseReservcationOnFullCacheTest,
IncreaseCacheReservationOnFullCache) {
;
constexpr std::size_t kSizeDummyEntry =
CacheReservationManagerImpl<CacheEntryRole::kMisc>::GetDummyEntrySize();
constexpr std::size_t kSmallCacheCapacity = 4 * kSizeDummyEntry;
constexpr std::size_t kBigCacheCapacity = 4096 * kSizeDummyEntry;
constexpr std::size_t kMetaDataChargeOverhead = 10000;
LRUCacheOptions lo;
lo.capacity = kSmallCacheCapacity;
lo.num_shard_bits = 0; // 2^0 shard
lo.strict_capacity_limit = true;
std::shared_ptr<Cache> cache = NewLRUCache(lo);
std::shared_ptr<CacheReservationManager> test_cache_rev_mng =
std::make_shared<CacheReservationManagerImpl<CacheEntryRole::kMisc>>(
cache);
std::size_t new_mem_used = kSmallCacheCapacity + 1;
Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::Incomplete())
<< "Failed to return status to indicate failure of dummy entry insertion "
"during cache reservation on full cache";
EXPECT_GE(test_cache_rev_mng->GetTotalReservedCacheSize(),
1 * kSizeDummyEntry)
<< "Failed to bookkeep correctly before cache resevation failure happens "
"due to full cache";
EXPECT_LE(test_cache_rev_mng->GetTotalReservedCacheSize(),
kSmallCacheCapacity)
<< "Failed to bookkeep correctly (i.e, bookkeep only successful dummy "
"entry insertions) when encountering cache resevation failure due to "
"full cache";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly";
EXPECT_GE(cache->GetPinnedUsage(), 1 * kSizeDummyEntry)
<< "Failed to insert underlying dummy entries correctly when "
"encountering cache resevation failure due to full cache";
EXPECT_LE(cache->GetPinnedUsage(), kSmallCacheCapacity)
<< "Failed to insert underlying dummy entries correctly when "
"encountering cache resevation failure due to full cache";
new_mem_used = kSmallCacheCapacity / 2; // 2 dummy entries
s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::OK())
<< "Failed to decrease cache reservation after encountering cache "
"reservation failure due to full cache";
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
2 * kSizeDummyEntry)
<< "Failed to bookkeep cache reservation decrease correctly after "
"encountering cache reservation due to full cache";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly";
EXPECT_GE(cache->GetPinnedUsage(), 2 * kSizeDummyEntry)
<< "Failed to release underlying dummy entries correctly on cache "
"reservation decrease after encountering cache resevation failure due "
"to full cache";
EXPECT_LT(cache->GetPinnedUsage(),
2 * kSizeDummyEntry + kMetaDataChargeOverhead)
<< "Failed to release underlying dummy entries correctly on cache "
"reservation decrease after encountering cache resevation failure due "
"to full cache";
// Create cache full again for subsequent tests
new_mem_used = kSmallCacheCapacity + 1;
s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::Incomplete())
<< "Failed to return status to indicate failure of dummy entry insertion "
"during cache reservation on full cache";
EXPECT_GE(test_cache_rev_mng->GetTotalReservedCacheSize(),
1 * kSizeDummyEntry)
<< "Failed to bookkeep correctly before cache resevation failure happens "
"due to full cache";
EXPECT_LE(test_cache_rev_mng->GetTotalReservedCacheSize(),
kSmallCacheCapacity)
<< "Failed to bookkeep correctly (i.e, bookkeep only successful dummy "
"entry insertions) when encountering cache resevation failure due to "
"full cache";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly";
EXPECT_GE(cache->GetPinnedUsage(), 1 * kSizeDummyEntry)
<< "Failed to insert underlying dummy entries correctly when "
"encountering cache resevation failure due to full cache";
EXPECT_LE(cache->GetPinnedUsage(), kSmallCacheCapacity)
<< "Failed to insert underlying dummy entries correctly when "
"encountering cache resevation failure due to full cache";
// Increase cache capacity so the previously failed insertion can fully
// succeed
cache->SetCapacity(kBigCacheCapacity);
new_mem_used = kSmallCacheCapacity + 1;
s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::OK())
<< "Failed to increase cache reservation after increasing cache capacity "
"and mitigating cache full error";
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
5 * kSizeDummyEntry)
<< "Failed to bookkeep cache reservation increase correctly after "
"increasing cache capacity and mitigating cache full error";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly";
EXPECT_GE(cache->GetPinnedUsage(), 5 * kSizeDummyEntry)
<< "Failed to insert underlying dummy entries correctly after increasing "
"cache capacity and mitigating cache full error";
EXPECT_LT(cache->GetPinnedUsage(),
5 * kSizeDummyEntry + kMetaDataChargeOverhead)
<< "Failed to insert underlying dummy entries correctly after increasing "
"cache capacity and mitigating cache full error";
}
TEST_F(CacheReservationManagerTest,
DecreaseCacheReservationByMultiplesOfDummyEntrySize) {
std::size_t new_mem_used = 2 * kSizeDummyEntry;
Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
ASSERT_EQ(s, Status::OK());
ASSERT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
2 * kSizeDummyEntry);
ASSERT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used);
ASSERT_GE(cache->GetPinnedUsage(), 2 * kSizeDummyEntry);
ASSERT_LT(cache->GetPinnedUsage(),
2 * kSizeDummyEntry + kMetaDataChargeOverhead);
new_mem_used = 1 * kSizeDummyEntry;
s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::OK())
<< "Failed to decrease cache reservation correctly";
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
1 * kSizeDummyEntry)
<< "Failed to bookkeep cache reservation decrease correctly";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly";
EXPECT_GE(cache->GetPinnedUsage(), 1 * kSizeDummyEntry)
<< "Failed to decrease underlying dummy entries in cache correctly";
EXPECT_LT(cache->GetPinnedUsage(),
1 * kSizeDummyEntry + kMetaDataChargeOverhead)
<< "Failed to decrease underlying dummy entries in cache correctly";
}
TEST_F(CacheReservationManagerTest,
DecreaseCacheReservationNotByMultiplesOfDummyEntrySize) {
std::size_t new_mem_used = 2 * kSizeDummyEntry;
Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
ASSERT_EQ(s, Status::OK());
ASSERT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
2 * kSizeDummyEntry);
ASSERT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used);
ASSERT_GE(cache->GetPinnedUsage(), 2 * kSizeDummyEntry);
ASSERT_LT(cache->GetPinnedUsage(),
2 * kSizeDummyEntry + kMetaDataChargeOverhead);
new_mem_used = kSizeDummyEntry / 2;
s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::OK())
<< "Failed to decrease cache reservation correctly";
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
1 * kSizeDummyEntry)
<< "Failed to bookkeep cache reservation decrease correctly";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly";
EXPECT_GE(cache->GetPinnedUsage(), 1 * kSizeDummyEntry)
<< "Failed to decrease underlying dummy entries in cache correctly";
EXPECT_LT(cache->GetPinnedUsage(),
1 * kSizeDummyEntry + kMetaDataChargeOverhead)
<< "Failed to decrease underlying dummy entries in cache correctly";
}
TEST(CacheReservationManagerWithDelayedDecreaseTest,
DecreaseCacheReservationWithDelayedDecrease) {
constexpr std::size_t kSizeDummyEntry =
CacheReservationManagerImpl<CacheEntryRole::kMisc>::GetDummyEntrySize();
constexpr std::size_t kCacheCapacity = 4096 * kSizeDummyEntry;
constexpr std::size_t kMetaDataChargeOverhead = 10000;
LRUCacheOptions lo;
lo.capacity = kCacheCapacity;
lo.num_shard_bits = 0;
std::shared_ptr<Cache> cache = NewLRUCache(lo);
std::shared_ptr<CacheReservationManager> test_cache_rev_mng =
std::make_shared<CacheReservationManagerImpl<CacheEntryRole::kMisc>>(
cache, true /* delayed_decrease */);
std::size_t new_mem_used = 8 * kSizeDummyEntry;
Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
ASSERT_EQ(s, Status::OK());
ASSERT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
8 * kSizeDummyEntry);
ASSERT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used);
std::size_t initial_pinned_usage = cache->GetPinnedUsage();
ASSERT_GE(initial_pinned_usage, 8 * kSizeDummyEntry);
ASSERT_LT(initial_pinned_usage,
8 * kSizeDummyEntry + kMetaDataChargeOverhead);
new_mem_used = 6 * kSizeDummyEntry;
s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::OK()) << "Failed to delay decreasing cache reservation";
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
8 * kSizeDummyEntry)
<< "Failed to bookkeep correctly when delaying cache reservation "
"decrease";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly";
EXPECT_EQ(cache->GetPinnedUsage(), initial_pinned_usage)
<< "Failed to delay decreasing underlying dummy entries in cache";
new_mem_used = 7 * kSizeDummyEntry;
s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::OK()) << "Failed to delay decreasing cache reservation";
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
8 * kSizeDummyEntry)
<< "Failed to bookkeep correctly when delaying cache reservation "
"decrease";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly";
EXPECT_EQ(cache->GetPinnedUsage(), initial_pinned_usage)
<< "Failed to delay decreasing underlying dummy entries in cache";
new_mem_used = 6 * kSizeDummyEntry - 1;
s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
EXPECT_EQ(s, Status::OK())
<< "Failed to decrease cache reservation correctly when new_mem_used < "
"GetTotalReservedCacheSize() * 3 / 4 on delayed decrease mode";
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(),
6 * kSizeDummyEntry)
<< "Failed to bookkeep correctly when new_mem_used < "
"GetTotalReservedCacheSize() * 3 / 4 on delayed decrease mode";
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), new_mem_used)
<< "Failed to bookkeep the used memory correctly";
EXPECT_GE(cache->GetPinnedUsage(), 6 * kSizeDummyEntry)
<< "Failed to decrease underlying dummy entries in cache when "
"new_mem_used < GetTotalReservedCacheSize() * 3 / 4 on delayed "
"decrease mode";
EXPECT_LT(cache->GetPinnedUsage(),
6 * kSizeDummyEntry + kMetaDataChargeOverhead)
<< "Failed to decrease underlying dummy entries in cache when "
"new_mem_used < GetTotalReservedCacheSize() * 3 / 4 on delayed "
"decrease mode";
}
TEST(CacheReservationManagerDestructorTest,
ReleaseRemainingDummyEntriesOnDestruction) {
constexpr std::size_t kSizeDummyEntry =
CacheReservationManagerImpl<CacheEntryRole::kMisc>::GetDummyEntrySize();
constexpr std::size_t kCacheCapacity = 4096 * kSizeDummyEntry;
constexpr std::size_t kMetaDataChargeOverhead = 10000;
LRUCacheOptions lo;
lo.capacity = kCacheCapacity;
lo.num_shard_bits = 0;
std::shared_ptr<Cache> cache = NewLRUCache(lo);
{
std::shared_ptr<CacheReservationManager> test_cache_rev_mng =
std::make_shared<CacheReservationManagerImpl<CacheEntryRole::kMisc>>(
cache);
std::size_t new_mem_used = 1 * kSizeDummyEntry;
Status s = test_cache_rev_mng->UpdateCacheReservation(new_mem_used);
ASSERT_EQ(s, Status::OK());
ASSERT_GE(cache->GetPinnedUsage(), 1 * kSizeDummyEntry);
ASSERT_LT(cache->GetPinnedUsage(),
1 * kSizeDummyEntry + kMetaDataChargeOverhead);
}
EXPECT_EQ(cache->GetPinnedUsage(), 0 * kSizeDummyEntry)
<< "Failed to release remaining underlying dummy entries in cache in "
"CacheReservationManager's destructor";
}
TEST(CacheReservationHandleTest, HandleTest) {
constexpr std::size_t kOneGigabyte = 1024 * 1024 * 1024;
constexpr std::size_t kSizeDummyEntry = 256 * 1024;
constexpr std::size_t kMetaDataChargeOverhead = 10000;
LRUCacheOptions lo;
lo.capacity = kOneGigabyte;
lo.num_shard_bits = 0;
std::shared_ptr<Cache> cache = NewLRUCache(lo);
std::shared_ptr<CacheReservationManager> test_cache_rev_mng(
std::make_shared<CacheReservationManagerImpl<CacheEntryRole::kMisc>>(
cache));
std::size_t mem_used = 0;
const std::size_t incremental_mem_used_handle_1 = 1 * kSizeDummyEntry;
const std::size_t incremental_mem_used_handle_2 = 2 * kSizeDummyEntry;
std::unique_ptr<CacheReservationManager::CacheReservationHandle> handle_1,
handle_2;
// To test consecutive CacheReservationManager::MakeCacheReservation works
// correctly in terms of returning the handle as well as updating cache
// reservation and the latest total memory used
Status s = test_cache_rev_mng->MakeCacheReservation(
incremental_mem_used_handle_1, &handle_1);
mem_used = mem_used + incremental_mem_used_handle_1;
ASSERT_EQ(s, Status::OK());
EXPECT_TRUE(handle_1 != nullptr);
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), mem_used);
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), mem_used);
EXPECT_GE(cache->GetPinnedUsage(), mem_used);
EXPECT_LT(cache->GetPinnedUsage(), mem_used + kMetaDataChargeOverhead);
s = test_cache_rev_mng->MakeCacheReservation(incremental_mem_used_handle_2,
&handle_2);
mem_used = mem_used + incremental_mem_used_handle_2;
ASSERT_EQ(s, Status::OK());
EXPECT_TRUE(handle_2 != nullptr);
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), mem_used);
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), mem_used);
EXPECT_GE(cache->GetPinnedUsage(), mem_used);
EXPECT_LT(cache->GetPinnedUsage(), mem_used + kMetaDataChargeOverhead);
// To test
// CacheReservationManager::CacheReservationHandle::~CacheReservationHandle()
// works correctly in releasing the cache reserved for the handle
handle_1.reset();
EXPECT_TRUE(handle_1 == nullptr);
mem_used = mem_used - incremental_mem_used_handle_1;
EXPECT_EQ(test_cache_rev_mng->GetTotalReservedCacheSize(), mem_used);
EXPECT_EQ(test_cache_rev_mng->GetTotalMemoryUsed(), mem_used);
EXPECT_GE(cache->GetPinnedUsage(), mem_used);
EXPECT_LT(cache->GetPinnedUsage(), mem_used + kMetaDataChargeOverhead);
// To test the actual CacheReservationManager object won't be deallocated
// as long as there remain handles pointing to it.
// We strongly recommend deallocating CacheReservationManager object only
// after all its handles are deallocated to keep things easy to reasonate
test_cache_rev_mng.reset();
EXPECT_GE(cache->GetPinnedUsage(), mem_used);
EXPECT_LT(cache->GetPinnedUsage(), mem_used + kMetaDataChargeOverhead);
handle_2.reset();
// The CacheReservationManager object is now deallocated since all the handles
// and its original pointer is gone
mem_used = mem_used - incremental_mem_used_handle_2;
EXPECT_EQ(mem_used, 0);
EXPECT_EQ(cache->GetPinnedUsage(), mem_used);
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

864
cache/cache_test.cc vendored
View File

@ -1,864 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "rocksdb/cache.h"
#include <forward_list>
#include <functional>
#include <iostream>
#include <string>
#include <vector>
#include "cache/clock_cache.h"
#include "cache/fast_lru_cache.h"
#include "cache/lru_cache.h"
#include "test_util/testharness.h"
#include "util/coding.h"
#include "util/string_util.h"
namespace ROCKSDB_NAMESPACE {
// Conversions between numeric keys/values and the types expected by Cache.
static std::string EncodeKey(int k) {
std::string result;
PutFixed32(&result, k);
return result;
}
static int DecodeKey(const Slice& k) {
assert(k.size() == 4);
return DecodeFixed32(k.data());
}
static void* EncodeValue(uintptr_t v) { return reinterpret_cast<void*>(v); }
static int DecodeValue(void* v) {
return static_cast<int>(reinterpret_cast<uintptr_t>(v));
}
const std::string kLRU = "lru";
const std::string kClock = "clock";
const std::string kFast = "fast";
void dumbDeleter(const Slice& /*key*/, void* /*value*/) {}
void eraseDeleter(const Slice& /*key*/, void* value) {
Cache* cache = reinterpret_cast<Cache*>(value);
cache->Erase("foo");
}
class CacheTest : public testing::TestWithParam<std::string> {
public:
static CacheTest* current_;
static void Deleter(const Slice& key, void* v) {
current_->deleted_keys_.push_back(DecodeKey(key));
current_->deleted_values_.push_back(DecodeValue(v));
}
static const int kCacheSize = 1000;
static const int kNumShardBits = 4;
static const int kCacheSize2 = 100;
static const int kNumShardBits2 = 2;
std::vector<int> deleted_keys_;
std::vector<int> deleted_values_;
std::shared_ptr<Cache> cache_;
std::shared_ptr<Cache> cache2_;
CacheTest()
: cache_(NewCache(kCacheSize, kNumShardBits, false)),
cache2_(NewCache(kCacheSize2, kNumShardBits2, false)) {
current_ = this;
}
~CacheTest() override {}
std::shared_ptr<Cache> NewCache(size_t capacity) {
auto type = GetParam();
if (type == kLRU) {
return NewLRUCache(capacity);
}
if (type == kClock) {
return NewClockCache(capacity);
}
if (type == kFast) {
return NewFastLRUCache(capacity);
}
return nullptr;
}
std::shared_ptr<Cache> NewCache(
size_t capacity, int num_shard_bits, bool strict_capacity_limit,
CacheMetadataChargePolicy charge_policy = kDontChargeCacheMetadata) {
auto type = GetParam();
if (type == kLRU) {
LRUCacheOptions co;
co.capacity = capacity;
co.num_shard_bits = num_shard_bits;
co.strict_capacity_limit = strict_capacity_limit;
co.high_pri_pool_ratio = 0;
co.metadata_charge_policy = charge_policy;
return NewLRUCache(co);
}
if (type == kClock) {
return NewClockCache(capacity, num_shard_bits, strict_capacity_limit,
charge_policy);
}
if (type == kFast) {
return NewFastLRUCache(capacity, num_shard_bits, strict_capacity_limit,
charge_policy);
}
return nullptr;
}
int Lookup(std::shared_ptr<Cache> cache, int key) {
Cache::Handle* handle = cache->Lookup(EncodeKey(key));
const int r = (handle == nullptr) ? -1 : DecodeValue(cache->Value(handle));
if (handle != nullptr) {
cache->Release(handle);
}
return r;
}
void Insert(std::shared_ptr<Cache> cache, int key, int value,
int charge = 1) {
EXPECT_OK(cache->Insert(EncodeKey(key), EncodeValue(value), charge,
&CacheTest::Deleter));
}
void Erase(std::shared_ptr<Cache> cache, int key) {
cache->Erase(EncodeKey(key));
}
int Lookup(int key) {
return Lookup(cache_, key);
}
void Insert(int key, int value, int charge = 1) {
Insert(cache_, key, value, charge);
}
void Erase(int key) {
Erase(cache_, key);
}
int Lookup2(int key) {
return Lookup(cache2_, key);
}
void Insert2(int key, int value, int charge = 1) {
Insert(cache2_, key, value, charge);
}
void Erase2(int key) {
Erase(cache2_, key);
}
};
CacheTest* CacheTest::current_;
class LRUCacheTest : public CacheTest {};
TEST_P(CacheTest, UsageTest) {
// cache is std::shared_ptr and will be automatically cleaned up.
const uint64_t kCapacity = 100000;
auto cache = NewCache(kCapacity, 8, false, kDontChargeCacheMetadata);
auto precise_cache = NewCache(kCapacity, 0, false, kFullChargeCacheMetadata);
ASSERT_EQ(0, cache->GetUsage());
ASSERT_EQ(0, precise_cache->GetUsage());
size_t usage = 0;
char value[10] = "abcdef";
// make sure everything will be cached
for (int i = 1; i < 100; ++i) {
std::string key(i, 'a');
auto kv_size = key.size() + 5;
ASSERT_OK(cache->Insert(key, reinterpret_cast<void*>(value), kv_size,
dumbDeleter));
ASSERT_OK(precise_cache->Insert(key, reinterpret_cast<void*>(value),
kv_size, dumbDeleter));
usage += kv_size;
ASSERT_EQ(usage, cache->GetUsage());
ASSERT_LT(usage, precise_cache->GetUsage());
}
cache->EraseUnRefEntries();
precise_cache->EraseUnRefEntries();
ASSERT_EQ(0, cache->GetUsage());
ASSERT_EQ(0, precise_cache->GetUsage());
// make sure the cache will be overloaded
for (uint64_t i = 1; i < kCapacity; ++i) {
auto key = std::to_string(i);
ASSERT_OK(cache->Insert(key, reinterpret_cast<void*>(value), key.size() + 5,
dumbDeleter));
ASSERT_OK(precise_cache->Insert(key, reinterpret_cast<void*>(value),
key.size() + 5, dumbDeleter));
}
// the usage should be close to the capacity
ASSERT_GT(kCapacity, cache->GetUsage());
ASSERT_GT(kCapacity, precise_cache->GetUsage());
ASSERT_LT(kCapacity * 0.95, cache->GetUsage());
ASSERT_LT(kCapacity * 0.95, precise_cache->GetUsage());
}
TEST_P(CacheTest, PinnedUsageTest) {
// cache is std::shared_ptr and will be automatically cleaned up.
const uint64_t kCapacity = 200000;
auto cache = NewCache(kCapacity, 8, false, kDontChargeCacheMetadata);
auto precise_cache = NewCache(kCapacity, 8, false, kFullChargeCacheMetadata);
size_t pinned_usage = 0;
char value[10] = "abcdef";
std::forward_list<Cache::Handle*> unreleased_handles;
std::forward_list<Cache::Handle*> unreleased_handles_in_precise_cache;
// Add entries. Unpin some of them after insertion. Then, pin some of them
// again. Check GetPinnedUsage().
for (int i = 1; i < 100; ++i) {
std::string key(i, 'a');
auto kv_size = key.size() + 5;
Cache::Handle* handle;
Cache::Handle* handle_in_precise_cache;
ASSERT_OK(cache->Insert(key, reinterpret_cast<void*>(value), kv_size,
dumbDeleter, &handle));
assert(handle);
ASSERT_OK(precise_cache->Insert(key, reinterpret_cast<void*>(value),
kv_size, dumbDeleter,
&handle_in_precise_cache));
assert(handle_in_precise_cache);
pinned_usage += kv_size;
ASSERT_EQ(pinned_usage, cache->GetPinnedUsage());
ASSERT_LT(pinned_usage, precise_cache->GetPinnedUsage());
if (i % 2 == 0) {
cache->Release(handle);
precise_cache->Release(handle_in_precise_cache);
pinned_usage -= kv_size;
ASSERT_EQ(pinned_usage, cache->GetPinnedUsage());
ASSERT_LT(pinned_usage, precise_cache->GetPinnedUsage());
} else {
unreleased_handles.push_front(handle);
unreleased_handles_in_precise_cache.push_front(handle_in_precise_cache);
}
if (i % 3 == 0) {
unreleased_handles.push_front(cache->Lookup(key));
auto x = precise_cache->Lookup(key);
assert(x);
unreleased_handles_in_precise_cache.push_front(x);
// If i % 2 == 0, then the entry was unpinned before Lookup, so pinned
// usage increased
if (i % 2 == 0) {
pinned_usage += kv_size;
}
ASSERT_EQ(pinned_usage, cache->GetPinnedUsage());
ASSERT_LT(pinned_usage, precise_cache->GetPinnedUsage());
}
}
auto precise_cache_pinned_usage = precise_cache->GetPinnedUsage();
ASSERT_LT(pinned_usage, precise_cache_pinned_usage);
// check that overloading the cache does not change the pinned usage
for (uint64_t i = 1; i < 2 * kCapacity; ++i) {
auto key = std::to_string(i);
ASSERT_OK(cache->Insert(key, reinterpret_cast<void*>(value), key.size() + 5,
dumbDeleter));
ASSERT_OK(precise_cache->Insert(key, reinterpret_cast<void*>(value),
key.size() + 5, dumbDeleter));
}
ASSERT_EQ(pinned_usage, cache->GetPinnedUsage());
ASSERT_EQ(precise_cache_pinned_usage, precise_cache->GetPinnedUsage());
cache->EraseUnRefEntries();
precise_cache->EraseUnRefEntries();
ASSERT_EQ(pinned_usage, cache->GetPinnedUsage());
ASSERT_EQ(precise_cache_pinned_usage, precise_cache->GetPinnedUsage());
// release handles for pinned entries to prevent memory leaks
for (auto handle : unreleased_handles) {
cache->Release(handle);
}
for (auto handle : unreleased_handles_in_precise_cache) {
precise_cache->Release(handle);
}
ASSERT_EQ(0, cache->GetPinnedUsage());
ASSERT_EQ(0, precise_cache->GetPinnedUsage());
cache->EraseUnRefEntries();
precise_cache->EraseUnRefEntries();
ASSERT_EQ(0, cache->GetUsage());
ASSERT_EQ(0, precise_cache->GetUsage());
}
TEST_P(CacheTest, HitAndMiss) {
ASSERT_EQ(-1, Lookup(100));
Insert(100, 101);
ASSERT_EQ(101, Lookup(100));
ASSERT_EQ(-1, Lookup(200));
ASSERT_EQ(-1, Lookup(300));
Insert(200, 201);
ASSERT_EQ(101, Lookup(100));
ASSERT_EQ(201, Lookup(200));
ASSERT_EQ(-1, Lookup(300));
Insert(100, 102);
ASSERT_EQ(102, Lookup(100));
ASSERT_EQ(201, Lookup(200));
ASSERT_EQ(-1, Lookup(300));
ASSERT_EQ(1U, deleted_keys_.size());
ASSERT_EQ(100, deleted_keys_[0]);
ASSERT_EQ(101, deleted_values_[0]);
}
TEST_P(CacheTest, InsertSameKey) {
Insert(1, 1);
Insert(1, 2);
ASSERT_EQ(2, Lookup(1));
}
TEST_P(CacheTest, Erase) {
Erase(200);
ASSERT_EQ(0U, deleted_keys_.size());
Insert(100, 101);
Insert(200, 201);
Erase(100);
ASSERT_EQ(-1, Lookup(100));
ASSERT_EQ(201, Lookup(200));
ASSERT_EQ(1U, deleted_keys_.size());
ASSERT_EQ(100, deleted_keys_[0]);
ASSERT_EQ(101, deleted_values_[0]);
Erase(100);
ASSERT_EQ(-1, Lookup(100));
ASSERT_EQ(201, Lookup(200));
ASSERT_EQ(1U, deleted_keys_.size());
}
TEST_P(CacheTest, EntriesArePinned) {
Insert(100, 101);
Cache::Handle* h1 = cache_->Lookup(EncodeKey(100));
ASSERT_EQ(101, DecodeValue(cache_->Value(h1)));
ASSERT_EQ(1U, cache_->GetUsage());
Insert(100, 102);
Cache::Handle* h2 = cache_->Lookup(EncodeKey(100));
ASSERT_EQ(102, DecodeValue(cache_->Value(h2)));
ASSERT_EQ(0U, deleted_keys_.size());
ASSERT_EQ(2U, cache_->GetUsage());
cache_->Release(h1);
ASSERT_EQ(1U, deleted_keys_.size());
ASSERT_EQ(100, deleted_keys_[0]);
ASSERT_EQ(101, deleted_values_[0]);
ASSERT_EQ(1U, cache_->GetUsage());
Erase(100);
ASSERT_EQ(-1, Lookup(100));
ASSERT_EQ(1U, deleted_keys_.size());
ASSERT_EQ(1U, cache_->GetUsage());
cache_->Release(h2);
ASSERT_EQ(2U, deleted_keys_.size());
ASSERT_EQ(100, deleted_keys_[1]);
ASSERT_EQ(102, deleted_values_[1]);
ASSERT_EQ(0U, cache_->GetUsage());
}
TEST_P(CacheTest, EvictionPolicy) {
Insert(100, 101);
Insert(200, 201);
// Frequently used entry must be kept around
for (int i = 0; i < kCacheSize * 2; i++) {
Insert(1000+i, 2000+i);
ASSERT_EQ(101, Lookup(100));
}
ASSERT_EQ(101, Lookup(100));
ASSERT_EQ(-1, Lookup(200));
}
TEST_P(CacheTest, ExternalRefPinsEntries) {
Insert(100, 101);
Cache::Handle* h = cache_->Lookup(EncodeKey(100));
ASSERT_TRUE(cache_->Ref(h));
ASSERT_EQ(101, DecodeValue(cache_->Value(h)));
ASSERT_EQ(1U, cache_->GetUsage());
for (int i = 0; i < 3; ++i) {
if (i > 0) {
// First release (i == 1) corresponds to Ref(), second release (i == 2)
// corresponds to Lookup(). Then, since all external refs are released,
// the below insertions should push out the cache entry.
cache_->Release(h);
}
// double cache size because the usage bit in block cache prevents 100 from
// being evicted in the first kCacheSize iterations
for (int j = 0; j < 2 * kCacheSize + 100; j++) {
Insert(1000 + j, 2000 + j);
}
if (i < 2) {
ASSERT_EQ(101, Lookup(100));
}
}
ASSERT_EQ(-1, Lookup(100));
}
TEST_P(CacheTest, EvictionPolicyRef) {
Insert(100, 101);
Insert(101, 102);
Insert(102, 103);
Insert(103, 104);
Insert(200, 101);
Insert(201, 102);
Insert(202, 103);
Insert(203, 104);
Cache::Handle* h201 = cache_->Lookup(EncodeKey(200));
Cache::Handle* h202 = cache_->Lookup(EncodeKey(201));
Cache::Handle* h203 = cache_->Lookup(EncodeKey(202));
Cache::Handle* h204 = cache_->Lookup(EncodeKey(203));
Insert(300, 101);
Insert(301, 102);
Insert(302, 103);
Insert(303, 104);
// Insert entries much more than Cache capacity
for (int i = 0; i < kCacheSize * 2; i++) {
Insert(1000 + i, 2000 + i);
}
// Check whether the entries inserted in the beginning
// are evicted. Ones without extra ref are evicted and
// those with are not.
ASSERT_EQ(-1, Lookup(100));
ASSERT_EQ(-1, Lookup(101));
ASSERT_EQ(-1, Lookup(102));
ASSERT_EQ(-1, Lookup(103));
ASSERT_EQ(-1, Lookup(300));
ASSERT_EQ(-1, Lookup(301));
ASSERT_EQ(-1, Lookup(302));
ASSERT_EQ(-1, Lookup(303));
ASSERT_EQ(101, Lookup(200));
ASSERT_EQ(102, Lookup(201));
ASSERT_EQ(103, Lookup(202));
ASSERT_EQ(104, Lookup(203));
// Cleaning up all the handles
cache_->Release(h201);
cache_->Release(h202);
cache_->Release(h203);
cache_->Release(h204);
}
TEST_P(CacheTest, EvictEmptyCache) {
// Insert item large than capacity to trigger eviction on empty cache.
auto cache = NewCache(1, 0, false);
ASSERT_OK(cache->Insert("foo", nullptr, 10, dumbDeleter));
}
TEST_P(CacheTest, EraseFromDeleter) {
// Have deleter which will erase item from cache, which will re-enter
// the cache at that point.
std::shared_ptr<Cache> cache = NewCache(10, 0, false);
ASSERT_OK(cache->Insert("foo", nullptr, 1, dumbDeleter));
ASSERT_OK(cache->Insert("bar", cache.get(), 1, eraseDeleter));
cache->Erase("bar");
ASSERT_EQ(nullptr, cache->Lookup("foo"));
ASSERT_EQ(nullptr, cache->Lookup("bar"));
}
TEST_P(CacheTest, ErasedHandleState) {
// insert a key and get two handles
Insert(100, 1000);
Cache::Handle* h1 = cache_->Lookup(EncodeKey(100));
Cache::Handle* h2 = cache_->Lookup(EncodeKey(100));
ASSERT_EQ(h1, h2);
ASSERT_EQ(DecodeValue(cache_->Value(h1)), 1000);
ASSERT_EQ(DecodeValue(cache_->Value(h2)), 1000);
// delete the key from the cache
Erase(100);
// can no longer find in the cache
ASSERT_EQ(-1, Lookup(100));
// release one handle
cache_->Release(h1);
// still can't find in cache
ASSERT_EQ(-1, Lookup(100));
cache_->Release(h2);
}
TEST_P(CacheTest, HeavyEntries) {
// Add a bunch of light and heavy entries and then count the combined
// size of items still in the cache, which must be approximately the
// same as the total capacity.
const int kLight = 1;
const int kHeavy = 10;
int added = 0;
int index = 0;
while (added < 2*kCacheSize) {
const int weight = (index & 1) ? kLight : kHeavy;
Insert(index, 1000+index, weight);
added += weight;
index++;
}
int cached_weight = 0;
for (int i = 0; i < index; i++) {
const int weight = (i & 1 ? kLight : kHeavy);
int r = Lookup(i);
if (r >= 0) {
cached_weight += weight;
ASSERT_EQ(1000+i, r);
}
}
ASSERT_LE(cached_weight, kCacheSize + kCacheSize/10);
}
TEST_P(CacheTest, NewId) {
uint64_t a = cache_->NewId();
uint64_t b = cache_->NewId();
ASSERT_NE(a, b);
}
class Value {
public:
explicit Value(size_t v) : v_(v) { }
size_t v_;
};
namespace {
void deleter(const Slice& /*key*/, void* value) {
delete static_cast<Value *>(value);
}
} // namespace
TEST_P(CacheTest, ReleaseAndErase) {
std::shared_ptr<Cache> cache = NewCache(5, 0, false);
Cache::Handle* handle;
Status s = cache->Insert(EncodeKey(100), EncodeValue(100), 1,
&CacheTest::Deleter, &handle);
ASSERT_TRUE(s.ok());
ASSERT_EQ(5U, cache->GetCapacity());
ASSERT_EQ(1U, cache->GetUsage());
ASSERT_EQ(0U, deleted_keys_.size());
auto erased = cache->Release(handle, true);
ASSERT_TRUE(erased);
// This tests that deleter has been called
ASSERT_EQ(1U, deleted_keys_.size());
}
TEST_P(CacheTest, ReleaseWithoutErase) {
std::shared_ptr<Cache> cache = NewCache(5, 0, false);
Cache::Handle* handle;
Status s = cache->Insert(EncodeKey(100), EncodeValue(100), 1,
&CacheTest::Deleter, &handle);
ASSERT_TRUE(s.ok());
ASSERT_EQ(5U, cache->GetCapacity());
ASSERT_EQ(1U, cache->GetUsage());
ASSERT_EQ(0U, deleted_keys_.size());
auto erased = cache->Release(handle);
ASSERT_FALSE(erased);
// This tests that deleter is not called. When cache has free capacity it is
// not expected to immediately erase the released items.
ASSERT_EQ(0U, deleted_keys_.size());
}
TEST_P(CacheTest, SetCapacity) {
// test1: increase capacity
// lets create a cache with capacity 5,
// then, insert 5 elements, then increase capacity
// to 10, returned capacity should be 10, usage=5
std::shared_ptr<Cache> cache = NewCache(5, 0, false);
std::vector<Cache::Handle*> handles(10);
// Insert 5 entries, but not releasing.
for (size_t i = 0; i < 5; i++) {
std::string key = std::to_string(i + 1);
Status s = cache->Insert(key, new Value(i + 1), 1, &deleter, &handles[i]);
ASSERT_TRUE(s.ok());
}
ASSERT_EQ(5U, cache->GetCapacity());
ASSERT_EQ(5U, cache->GetUsage());
cache->SetCapacity(10);
ASSERT_EQ(10U, cache->GetCapacity());
ASSERT_EQ(5U, cache->GetUsage());
// test2: decrease capacity
// insert 5 more elements to cache, then release 5,
// then decrease capacity to 7, final capacity should be 7
// and usage should be 7
for (size_t i = 5; i < 10; i++) {
std::string key = std::to_string(i + 1);
Status s = cache->Insert(key, new Value(i + 1), 1, &deleter, &handles[i]);
ASSERT_TRUE(s.ok());
}
ASSERT_EQ(10U, cache->GetCapacity());
ASSERT_EQ(10U, cache->GetUsage());
for (size_t i = 0; i < 5; i++) {
cache->Release(handles[i]);
}
ASSERT_EQ(10U, cache->GetCapacity());
ASSERT_EQ(10U, cache->GetUsage());
cache->SetCapacity(7);
ASSERT_EQ(7, cache->GetCapacity());
ASSERT_EQ(7, cache->GetUsage());
// release remaining 5 to keep valgrind happy
for (size_t i = 5; i < 10; i++) {
cache->Release(handles[i]);
}
// Make sure this doesn't crash or upset ASAN/valgrind
cache->DisownData();
}
TEST_P(LRUCacheTest, SetStrictCapacityLimit) {
// test1: set the flag to false. Insert more keys than capacity. See if they
// all go through.
std::shared_ptr<Cache> cache = NewCache(5, 0, false);
std::vector<Cache::Handle*> handles(10);
Status s;
for (size_t i = 0; i < 10; i++) {
std::string key = std::to_string(i + 1);
s = cache->Insert(key, new Value(i + 1), 1, &deleter, &handles[i]);
ASSERT_OK(s);
ASSERT_NE(nullptr, handles[i]);
}
ASSERT_EQ(10, cache->GetUsage());
// test2: set the flag to true. Insert and check if it fails.
std::string extra_key = "extra";
Value* extra_value = new Value(0);
cache->SetStrictCapacityLimit(true);
Cache::Handle* handle;
s = cache->Insert(extra_key, extra_value, 1, &deleter, &handle);
ASSERT_TRUE(s.IsIncomplete());
ASSERT_EQ(nullptr, handle);
ASSERT_EQ(10, cache->GetUsage());
for (size_t i = 0; i < 10; i++) {
cache->Release(handles[i]);
}
// test3: init with flag being true.
std::shared_ptr<Cache> cache2 = NewCache(5, 0, true);
for (size_t i = 0; i < 5; i++) {
std::string key = std::to_string(i + 1);
s = cache2->Insert(key, new Value(i + 1), 1, &deleter, &handles[i]);
ASSERT_OK(s);
ASSERT_NE(nullptr, handles[i]);
}
s = cache2->Insert(extra_key, extra_value, 1, &deleter, &handle);
ASSERT_TRUE(s.IsIncomplete());
ASSERT_EQ(nullptr, handle);
// test insert without handle
s = cache2->Insert(extra_key, extra_value, 1, &deleter);
// AS if the key have been inserted into cache but get evicted immediately.
ASSERT_OK(s);
ASSERT_EQ(5, cache2->GetUsage());
ASSERT_EQ(nullptr, cache2->Lookup(extra_key));
for (size_t i = 0; i < 5; i++) {
cache2->Release(handles[i]);
}
}
TEST_P(CacheTest, OverCapacity) {
size_t n = 10;
// a LRUCache with n entries and one shard only
std::shared_ptr<Cache> cache = NewCache(n, 0, false);
std::vector<Cache::Handle*> handles(n+1);
// Insert n+1 entries, but not releasing.
for (size_t i = 0; i < n + 1; i++) {
std::string key = std::to_string(i + 1);
Status s = cache->Insert(key, new Value(i + 1), 1, &deleter, &handles[i]);
ASSERT_TRUE(s.ok());
}
// Guess what's in the cache now?
for (size_t i = 0; i < n + 1; i++) {
std::string key = std::to_string(i + 1);
auto h = cache->Lookup(key);
ASSERT_TRUE(h != nullptr);
if (h) cache->Release(h);
}
// the cache is over capacity since nothing could be evicted
ASSERT_EQ(n + 1U, cache->GetUsage());
for (size_t i = 0; i < n + 1; i++) {
cache->Release(handles[i]);
}
// Make sure eviction is triggered.
cache->SetCapacity(n);
// cache is under capacity now since elements were released
ASSERT_EQ(n, cache->GetUsage());
// element 0 is evicted and the rest is there
// This is consistent with the LRU policy since the element 0
// was released first
for (size_t i = 0; i < n + 1; i++) {
std::string key = std::to_string(i + 1);
auto h = cache->Lookup(key);
if (h) {
ASSERT_NE(i, 0U);
cache->Release(h);
} else {
ASSERT_EQ(i, 0U);
}
}
}
namespace {
std::vector<std::pair<int, int>> legacy_callback_state;
void legacy_callback(void* value, size_t charge) {
legacy_callback_state.push_back(
{DecodeValue(value), static_cast<int>(charge)});
}
};
TEST_P(CacheTest, ApplyToAllCacheEntriesTest) {
std::vector<std::pair<int, int>> inserted;
legacy_callback_state.clear();
for (int i = 0; i < 10; ++i) {
Insert(i, i * 2, i + 1);
inserted.push_back({i * 2, i + 1});
}
cache_->ApplyToAllCacheEntries(legacy_callback, true);
std::sort(inserted.begin(), inserted.end());
std::sort(legacy_callback_state.begin(), legacy_callback_state.end());
ASSERT_EQ(inserted.size(), legacy_callback_state.size());
for (size_t i = 0; i < inserted.size(); ++i) {
EXPECT_EQ(inserted[i], legacy_callback_state[i]);
}
}
TEST_P(CacheTest, ApplyToAllEntriesTest) {
std::vector<std::string> callback_state;
const auto callback = [&](const Slice& key, void* value, size_t charge,
Cache::DeleterFn deleter) {
callback_state.push_back(std::to_string(DecodeKey(key)) + "," +
std::to_string(DecodeValue(value)) + "," +
std::to_string(charge));
assert(deleter == &CacheTest::Deleter);
};
std::vector<std::string> inserted;
callback_state.clear();
for (int i = 0; i < 10; ++i) {
Insert(i, i * 2, i + 1);
inserted.push_back(std::to_string(i) + "," + std::to_string(i * 2) + "," +
std::to_string(i + 1));
}
cache_->ApplyToAllEntries(callback, /*opts*/ {});
std::sort(inserted.begin(), inserted.end());
std::sort(callback_state.begin(), callback_state.end());
ASSERT_EQ(inserted.size(), callback_state.size());
for (size_t i = 0; i < inserted.size(); ++i) {
EXPECT_EQ(inserted[i], callback_state[i]);
}
}
TEST_P(CacheTest, ApplyToAllEntriesDuringResize) {
// This is a mini-stress test of ApplyToAllEntries, to ensure
// items in the cache that are neither added nor removed
// during ApplyToAllEntries are counted exactly once.
// Insert some entries that we expect to be seen exactly once
// during iteration.
constexpr int kSpecialCharge = 2;
constexpr int kNotSpecialCharge = 1;
constexpr int kSpecialCount = 100;
for (int i = 0; i < kSpecialCount; ++i) {
Insert(i, i * 2, kSpecialCharge);
}
// For callback
int special_count = 0;
const auto callback = [&](const Slice&, void*, size_t charge,
Cache::DeleterFn) {
if (charge == static_cast<size_t>(kSpecialCharge)) {
++special_count;
}
};
// Start counting
std::thread apply_thread([&]() {
// Use small average_entries_per_lock to make the problem difficult
Cache::ApplyToAllEntriesOptions opts;
opts.average_entries_per_lock = 2;
cache_->ApplyToAllEntries(callback, opts);
});
// In parallel, add more entries, enough to cause resize but not enough
// to cause ejections
for (int i = kSpecialCount * 1; i < kSpecialCount * 6; ++i) {
Insert(i, i * 2, kNotSpecialCharge);
}
apply_thread.join();
ASSERT_EQ(special_count, kSpecialCount);
}
TEST_P(CacheTest, DefaultShardBits) {
// test1: set the flag to false. Insert more keys than capacity. See if they
// all go through.
std::shared_ptr<Cache> cache = NewCache(16 * 1024L * 1024L);
ShardedCache* sc = dynamic_cast<ShardedCache*>(cache.get());
ASSERT_EQ(5, sc->GetNumShardBits());
cache = NewLRUCache(511 * 1024L, -1, true);
sc = dynamic_cast<ShardedCache*>(cache.get());
ASSERT_EQ(0, sc->GetNumShardBits());
cache = NewLRUCache(1024L * 1024L * 1024L, -1, true);
sc = dynamic_cast<ShardedCache*>(cache.get());
ASSERT_EQ(6, sc->GetNumShardBits());
}
TEST_P(CacheTest, GetChargeAndDeleter) {
Insert(1, 2);
Cache::Handle* h1 = cache_->Lookup(EncodeKey(1));
ASSERT_EQ(2, DecodeValue(cache_->Value(h1)));
ASSERT_EQ(1, cache_->GetCharge(h1));
ASSERT_EQ(&CacheTest::Deleter, cache_->GetDeleter(h1));
cache_->Release(h1);
}
#ifdef SUPPORT_CLOCK_CACHE
std::shared_ptr<Cache> (*new_clock_cache_func)(
size_t, int, bool, CacheMetadataChargePolicy) = NewClockCache;
INSTANTIATE_TEST_CASE_P(CacheTestInstance, CacheTest,
testing::Values(kLRU, kClock, kFast));
#else
INSTANTIATE_TEST_CASE_P(CacheTestInstance, CacheTest,
testing::Values(kLRU, kFast));
#endif // SUPPORT_CLOCK_CACHE
INSTANTIATE_TEST_CASE_P(CacheTestInstance, LRUCacheTest,
testing::Values(kLRU, kFast));
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

839
cache/clock_cache.cc vendored
View File

@ -1,839 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "cache/clock_cache.h"
#ifndef SUPPORT_CLOCK_CACHE
namespace ROCKSDB_NAMESPACE {
std::shared_ptr<Cache> NewClockCache(
size_t /*capacity*/, int /*num_shard_bits*/, bool /*strict_capacity_limit*/,
CacheMetadataChargePolicy /*metadata_charge_policy*/) {
// Clock cache not supported.
return nullptr;
}
} // namespace ROCKSDB_NAMESPACE
#else
#include <assert.h>
#include <atomic>
#include <deque>
// "tbb/concurrent_hash_map.h" requires RTTI if exception is enabled.
// Disable it so users can chooose to disable RTTI.
#ifndef ROCKSDB_USE_RTTI
#define TBB_USE_EXCEPTIONS 0
#endif
#include "cache/sharded_cache.h"
#include "port/lang.h"
#include "port/malloc.h"
#include "port/port.h"
#include "tbb/concurrent_hash_map.h"
#include "util/autovector.h"
#include "util/mutexlock.h"
namespace ROCKSDB_NAMESPACE {
namespace {
// An implementation of the Cache interface based on CLOCK algorithm, with
// better concurrent performance than LRUCache. The idea of CLOCK algorithm
// is to maintain all cache entries in a circular list, and an iterator
// (the "head") pointing to the last examined entry. Eviction starts from the
// current head. Each entry is given a second chance before eviction, if it
// has been access since last examine. In contrast to LRU, no modification
// to the internal data-structure (except for flipping the usage bit) needs
// to be done upon lookup. This gives us oppertunity to implement a cache
// with better concurrency.
//
// Each cache entry is represented by a cache handle, and all the handles
// are arranged in a circular list, as describe above. Upon erase of an entry,
// we never remove the handle. Instead, the handle is put into a recycle bin
// to be re-use. This is to avoid memory dealocation, which is hard to deal
// with in concurrent environment.
//
// The cache also maintains a concurrent hash map for lookup. Any concurrent
// hash map implementation should do the work. We currently use
// tbb::concurrent_hash_map because it supports concurrent erase.
//
// Each cache handle has the following flags and counters, which are squeeze
// in an atomic interger, to make sure the handle always be in a consistent
// state:
//
// * In-cache bit: whether the entry is reference by the cache itself. If
// an entry is in cache, its key would also be available in the hash map.
// * Usage bit: whether the entry has been access by user since last
// examine for eviction. Can be reset by eviction.
// * Reference count: reference count by user.
//
// An entry can be reference only when it's in cache. An entry can be evicted
// only when it is in cache, has no usage since last examine, and reference
// count is zero.
//
// The follow figure shows a possible layout of the cache. Boxes represents
// cache handles and numbers in each box being in-cache bit, usage bit and
// reference count respectively.
//
// hash map:
// +-------+--------+
// | key | handle |
// +-------+--------+
// | "foo" | 5 |-------------------------------------+
// +-------+--------+ |
// | "bar" | 2 |--+ |
// +-------+--------+ | |
// | |
// head | |
// | | |
// circular list: | | |
// +-------+ +-------+ +-------+ +-------+ +-------+ +-------
// |(0,0,0)|---|(1,1,0)|---|(0,0,0)|---|(0,1,3)|---|(1,0,0)|---| ...
// +-------+ +-------+ +-------+ +-------+ +-------+ +-------
// | |
// +-------+ +-----------+
// | |
// +---+---+
// recycle bin: | 1 | 3 |
// +---+---+
//
// Suppose we try to insert "baz" into the cache at this point and the cache is
// full. The cache will first look for entries to evict, starting from where
// head points to (the second entry). It resets usage bit of the second entry,
// skips the third and fourth entry since they are not in cache, and finally
// evict the fifth entry ("foo"). It looks at recycle bin for available handle,
// grabs handle 3, and insert the key into the handle. The following figure
// shows the resulting layout.
//
// hash map:
// +-------+--------+
// | key | handle |
// +-------+--------+
// | "baz" | 3 |-------------+
// +-------+--------+ |
// | "bar" | 2 |--+ |
// +-------+--------+ | |
// | |
// | | head
// | | |
// circular list: | | |
// +-------+ +-------+ +-------+ +-------+ +-------+ +-------
// |(0,0,0)|---|(1,0,0)|---|(1,0,0)|---|(0,1,3)|---|(0,0,0)|---| ...
// +-------+ +-------+ +-------+ +-------+ +-------+ +-------
// | |
// +-------+ +-----------------------------------+
// | |
// +---+---+
// recycle bin: | 1 | 5 |
// +---+---+
//
// A global mutex guards the circular list, the head, and the recycle bin.
// We additionally require that modifying the hash map needs to hold the mutex.
// As such, Modifying the cache (such as Insert() and Erase()) require to
// hold the mutex. Lookup() only access the hash map and the flags associated
// with each handle, and don't require explicit locking. Release() has to
// acquire the mutex only when it releases the last reference to the entry and
// the entry has been erased from cache explicitly. A future improvement could
// be to remove the mutex completely.
//
// Benchmark:
// We run readrandom db_bench on a test DB of size 13GB, with size of each
// level:
//
// Level Files Size(MB)
// -------------------------
// L0 1 0.01
// L1 18 17.32
// L2 230 182.94
// L3 1186 1833.63
// L4 4602 8140.30
//
// We test with both 32 and 16 read threads, with 2GB cache size (the whole DB
// doesn't fits in) and 64GB cache size (the whole DB can fit in cache), and
// whether to put index and filter blocks in block cache. The benchmark runs
// with
// with RocksDB 4.10. We got the following result:
//
// Threads Cache Cache ClockCache LRUCache
// Size Index/Filter Throughput(MB/s) Hit Throughput(MB/s) Hit
// 32 2GB yes 466.7 85.9% 433.7 86.5%
// 32 2GB no 529.9 72.7% 532.7 73.9%
// 32 64GB yes 649.9 99.9% 507.9 99.9%
// 32 64GB no 740.4 99.9% 662.8 99.9%
// 16 2GB yes 278.4 85.9% 283.4 86.5%
// 16 2GB no 318.6 72.7% 335.8 73.9%
// 16 64GB yes 391.9 99.9% 353.3 99.9%
// 16 64GB no 433.8 99.8% 419.4 99.8%
// Cache entry meta data.
struct CacheHandle {
Slice key;
void* value;
size_t charge;
Cache::DeleterFn deleter;
uint32_t hash;
// Addition to "charge" to get "total charge" under metadata policy.
uint32_t meta_charge;
// Flags and counters associated with the cache handle:
// lowest bit: in-cache bit
// second lowest bit: usage bit
// the rest bits: reference count
// The handle is unused when flags equals to 0. The thread decreases the count
// to 0 is responsible to put the handle back to recycle_ and cleanup memory.
std::atomic<uint32_t> flags;
CacheHandle() = default;
CacheHandle(const CacheHandle& a) { *this = a; }
CacheHandle(const Slice& k, void* v,
void (*del)(const Slice& key, void* value))
: key(k), value(v), deleter(del) {}
CacheHandle& operator=(const CacheHandle& a) {
// Only copy members needed for deletion.
key = a.key;
value = a.value;
deleter = a.deleter;
return *this;
}
inline static uint32_t CalcMetadataCharge(
Slice key, CacheMetadataChargePolicy metadata_charge_policy) {
size_t meta_charge = 0;
if (metadata_charge_policy == kFullChargeCacheMetadata) {
meta_charge += sizeof(CacheHandle);
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
meta_charge +=
malloc_usable_size(static_cast<void*>(const_cast<char*>(key.data())));
#else
meta_charge += key.size();
#endif
}
assert(meta_charge <= UINT32_MAX);
return static_cast<uint32_t>(meta_charge);
}
inline size_t GetTotalCharge() { return charge + meta_charge; }
};
// Key of hash map. We store hash value with the key for convenience.
struct ClockCacheKey {
Slice key;
uint32_t hash_value;
ClockCacheKey() = default;
ClockCacheKey(const Slice& k, uint32_t h) {
key = k;
hash_value = h;
}
static bool equal(const ClockCacheKey& a, const ClockCacheKey& b) {
return a.hash_value == b.hash_value && a.key == b.key;
}
static size_t hash(const ClockCacheKey& a) {
return static_cast<size_t>(a.hash_value);
}
};
struct CleanupContext {
// List of values to be deleted, along with the key and deleter.
autovector<CacheHandle> to_delete_value;
// List of keys to be deleted.
autovector<const char*> to_delete_key;
};
// A cache shard which maintains its own CLOCK cache.
class ClockCacheShard final : public CacheShard {
public:
// Hash map type.
using HashTable =
tbb::concurrent_hash_map<ClockCacheKey, CacheHandle*, ClockCacheKey>;
ClockCacheShard();
~ClockCacheShard() override;
// Interfaces
void SetCapacity(size_t capacity) override;
void SetStrictCapacityLimit(bool strict_capacity_limit) override;
Status Insert(const Slice& key, uint32_t hash, void* value, size_t charge,
void (*deleter)(const Slice& key, void* value),
Cache::Handle** handle, Cache::Priority priority) override;
Status Insert(const Slice& key, uint32_t hash, void* value,
const Cache::CacheItemHelper* helper, size_t charge,
Cache::Handle** handle, Cache::Priority priority) override {
return Insert(key, hash, value, charge, helper->del_cb, handle, priority);
}
Cache::Handle* Lookup(const Slice& key, uint32_t hash) override;
Cache::Handle* Lookup(const Slice& key, uint32_t hash,
const Cache::CacheItemHelper* /*helper*/,
const Cache::CreateCallback& /*create_cb*/,
Cache::Priority /*priority*/, bool /*wait*/,
Statistics* /*stats*/) override {
return Lookup(key, hash);
}
bool Release(Cache::Handle* handle, bool /*useful*/,
bool erase_if_last_ref) override {
return Release(handle, erase_if_last_ref);
}
bool IsReady(Cache::Handle* /*handle*/) override { return true; }
void Wait(Cache::Handle* /*handle*/) override {}
// If the entry in in cache, increase reference count and return true.
// Return false otherwise.
//
// Not necessary to hold mutex_ before being called.
bool Ref(Cache::Handle* handle) override;
bool Release(Cache::Handle* handle, bool erase_if_last_ref = false) override;
void Erase(const Slice& key, uint32_t hash) override;
bool EraseAndConfirm(const Slice& key, uint32_t hash,
CleanupContext* context);
size_t GetUsage() const override;
size_t GetPinnedUsage() const override;
void EraseUnRefEntries() override;
void ApplyToSomeEntries(
const std::function<void(const Slice& key, void* value, size_t charge,
DeleterFn deleter)>& callback,
uint32_t average_entries_per_lock, uint32_t* state) override;
private:
static const uint32_t kInCacheBit = 1;
static const uint32_t kUsageBit = 2;
static const uint32_t kRefsOffset = 2;
static const uint32_t kOneRef = 1 << kRefsOffset;
// Helper functions to extract cache handle flags and counters.
static bool InCache(uint32_t flags) { return flags & kInCacheBit; }
static bool HasUsage(uint32_t flags) { return flags & kUsageBit; }
static uint32_t CountRefs(uint32_t flags) { return flags >> kRefsOffset; }
// Decrease reference count of the entry. If this decreases the count to 0,
// recycle the entry. If set_usage is true, also set the usage bit.
//
// returns true if a value is erased.
//
// Not necessary to hold mutex_ before being called.
bool Unref(CacheHandle* handle, bool set_usage, CleanupContext* context);
// Unset in-cache bit of the entry. Recycle the handle if necessary.
//
// returns true if a value is erased.
//
// Has to hold mutex_ before being called.
bool UnsetInCache(CacheHandle* handle, CleanupContext* context);
// Put the handle back to recycle_ list, and put the value associated with
// it into to-be-deleted list. It doesn't cleanup the key as it might be
// reused by another handle.
//
// Has to hold mutex_ before being called.
void RecycleHandle(CacheHandle* handle, CleanupContext* context);
// Delete keys and values in to-be-deleted list. Call the method without
// holding mutex, as destructors can be expensive.
void Cleanup(const CleanupContext& context);
// Examine the handle for eviction. If the handle is in cache, usage bit is
// not set, and referece count is 0, evict it from cache. Otherwise unset
// the usage bit.
//
// Has to hold mutex_ before being called.
bool TryEvict(CacheHandle* value, CleanupContext* context);
// Scan through the circular list, evict entries until we get enough capacity
// for new cache entry of specific size. Return true if success, false
// otherwise.
//
// Has to hold mutex_ before being called.
bool EvictFromCache(size_t charge, CleanupContext* context);
CacheHandle* Insert(const Slice& key, uint32_t hash, void* value,
size_t change,
void (*deleter)(const Slice& key, void* value),
bool hold_reference, CleanupContext* context,
bool* overwritten);
// Guards list_, head_, and recycle_. In addition, updating table_ also has
// to hold the mutex, to avoid the cache being in inconsistent state.
mutable port::Mutex mutex_;
// The circular list of cache handles. Initially the list is empty. Once a
// handle is needed by insertion, and no more handles are available in
// recycle bin, one more handle is appended to the end.
//
// We use std::deque for the circular list because we want to make sure
// pointers to handles are valid through out the life-cycle of the cache
// (in contrast to std::vector), and be able to grow the list (in contrast
// to statically allocated arrays).
std::deque<CacheHandle> list_;
// Pointer to the next handle in the circular list to be examine for
// eviction.
size_t head_;
// Recycle bin of cache handles.
autovector<CacheHandle*> recycle_;
// Maximum cache size.
std::atomic<size_t> capacity_;
// Current total size of the cache.
std::atomic<size_t> usage_;
// Total un-released cache size.
std::atomic<size_t> pinned_usage_;
// Whether allow insert into cache if cache is full.
std::atomic<bool> strict_capacity_limit_;
// Hash table (tbb::concurrent_hash_map) for lookup.
HashTable table_;
};
ClockCacheShard::ClockCacheShard()
: head_(0), usage_(0), pinned_usage_(0), strict_capacity_limit_(false) {}
ClockCacheShard::~ClockCacheShard() {
for (auto& handle : list_) {
uint32_t flags = handle.flags.load(std::memory_order_relaxed);
if (InCache(flags) || CountRefs(flags) > 0) {
if (handle.deleter != nullptr) {
(*handle.deleter)(handle.key, handle.value);
}
delete[] handle.key.data();
}
}
}
size_t ClockCacheShard::GetUsage() const {
return usage_.load(std::memory_order_relaxed);
}
size_t ClockCacheShard::GetPinnedUsage() const {
return pinned_usage_.load(std::memory_order_relaxed);
}
void ClockCacheShard::ApplyToSomeEntries(
const std::function<void(const Slice& key, void* value, size_t charge,
DeleterFn deleter)>& callback,
uint32_t average_entries_per_lock, uint32_t* state) {
assert(average_entries_per_lock > 0);
MutexLock lock(&mutex_);
// Figure out the range to iterate, update `state`
size_t list_size = list_.size();
size_t start_idx = *state;
size_t end_idx = start_idx + average_entries_per_lock;
if (start_idx > list_size) {
// Shouldn't reach here, but recoverable
assert(false);
// Mark finished with all
*state = UINT32_MAX;
return;
}
if (end_idx >= list_size || end_idx >= UINT32_MAX) {
// This also includes the hypothetical case of >4 billion
// cache handles.
end_idx = list_size;
// Mark finished with all
*state = UINT32_MAX;
} else {
*state = static_cast<uint32_t>(end_idx);
}
// Do the iteration
auto cur = list_.begin() + start_idx;
auto end = list_.begin() + end_idx;
for (; cur != end; ++cur) {
const CacheHandle& handle = *cur;
// Use relaxed semantics instead of acquire semantics since we are
// holding mutex
uint32_t flags = handle.flags.load(std::memory_order_relaxed);
if (InCache(flags)) {
callback(handle.key, handle.value, handle.charge, handle.deleter);
}
}
}
void ClockCacheShard::RecycleHandle(CacheHandle* handle,
CleanupContext* context) {
mutex_.AssertHeld();
assert(!InCache(handle->flags) && CountRefs(handle->flags) == 0);
context->to_delete_key.push_back(handle->key.data());
context->to_delete_value.emplace_back(*handle);
size_t total_charge = handle->GetTotalCharge();
// clearing `handle` fields would go here but not strictly required
recycle_.push_back(handle);
usage_.fetch_sub(total_charge, std::memory_order_relaxed);
}
void ClockCacheShard::Cleanup(const CleanupContext& context) {
for (const CacheHandle& handle : context.to_delete_value) {
if (handle.deleter) {
(*handle.deleter)(handle.key, handle.value);
}
}
for (const char* key : context.to_delete_key) {
delete[] key;
}
}
bool ClockCacheShard::Ref(Cache::Handle* h) {
auto handle = reinterpret_cast<CacheHandle*>(h);
// CAS loop to increase reference count.
uint32_t flags = handle->flags.load(std::memory_order_relaxed);
while (InCache(flags)) {
// Use acquire semantics on success, as further operations on the cache
// entry has to be order after reference count is increased.
if (handle->flags.compare_exchange_weak(flags, flags + kOneRef,
std::memory_order_acquire,
std::memory_order_relaxed)) {
if (CountRefs(flags) == 0) {
// No reference count before the operation.
size_t total_charge = handle->GetTotalCharge();
pinned_usage_.fetch_add(total_charge, std::memory_order_relaxed);
}
return true;
}
}
return false;
}
bool ClockCacheShard::Unref(CacheHandle* handle, bool set_usage,
CleanupContext* context) {
if (set_usage) {
handle->flags.fetch_or(kUsageBit, std::memory_order_relaxed);
}
// If the handle reaches state refs=0 and InCache=true after this
// atomic operation then we cannot access `handle` afterward, because
// it could be evicted before we access the `handle`.
size_t total_charge = handle->GetTotalCharge();
// Use acquire-release semantics as previous operations on the cache entry
// has to be order before reference count is decreased, and potential cleanup
// of the entry has to be order after.
uint32_t flags = handle->flags.fetch_sub(kOneRef, std::memory_order_acq_rel);
assert(CountRefs(flags) > 0);
if (CountRefs(flags) == 1) {
// this is the last reference.
pinned_usage_.fetch_sub(total_charge, std::memory_order_relaxed);
// Cleanup if it is the last reference.
if (!InCache(flags)) {
MutexLock l(&mutex_);
RecycleHandle(handle, context);
}
}
return context->to_delete_value.size();
}
bool ClockCacheShard::UnsetInCache(CacheHandle* handle,
CleanupContext* context) {
mutex_.AssertHeld();
// Use acquire-release semantics as previous operations on the cache entry
// has to be order before reference count is decreased, and potential cleanup
// of the entry has to be order after.
uint32_t flags =
handle->flags.fetch_and(~kInCacheBit, std::memory_order_acq_rel);
// Cleanup if it is the last reference.
if (InCache(flags) && CountRefs(flags) == 0) {
RecycleHandle(handle, context);
}
return context->to_delete_value.size();
}
bool ClockCacheShard::TryEvict(CacheHandle* handle, CleanupContext* context) {
mutex_.AssertHeld();
uint32_t flags = kInCacheBit;
if (handle->flags.compare_exchange_strong(flags, 0, std::memory_order_acquire,
std::memory_order_relaxed)) {
bool erased __attribute__((__unused__)) =
table_.erase(ClockCacheKey(handle->key, handle->hash));
assert(erased);
RecycleHandle(handle, context);
return true;
}
handle->flags.fetch_and(~kUsageBit, std::memory_order_relaxed);
return false;
}
bool ClockCacheShard::EvictFromCache(size_t charge, CleanupContext* context) {
size_t usage = usage_.load(std::memory_order_relaxed);
size_t capacity = capacity_.load(std::memory_order_relaxed);
if (usage == 0) {
return charge <= capacity;
}
size_t new_head = head_;
bool second_iteration = false;
while (usage + charge > capacity) {
assert(new_head < list_.size());
if (TryEvict(&list_[new_head], context)) {
usage = usage_.load(std::memory_order_relaxed);
}
new_head = (new_head + 1 >= list_.size()) ? 0 : new_head + 1;
if (new_head == head_) {
if (second_iteration) {
return false;
} else {
second_iteration = true;
}
}
}
head_ = new_head;
return true;
}
void ClockCacheShard::SetCapacity(size_t capacity) {
CleanupContext context;
{
MutexLock l(&mutex_);
capacity_.store(capacity, std::memory_order_relaxed);
EvictFromCache(0, &context);
}
Cleanup(context);
}
void ClockCacheShard::SetStrictCapacityLimit(bool strict_capacity_limit) {
strict_capacity_limit_.store(strict_capacity_limit,
std::memory_order_relaxed);
}
CacheHandle* ClockCacheShard::Insert(
const Slice& key, uint32_t hash, void* value, size_t charge,
void (*deleter)(const Slice& key, void* value), bool hold_reference,
CleanupContext* context, bool* overwritten) {
assert(overwritten != nullptr && *overwritten == false);
uint32_t meta_charge =
CacheHandle::CalcMetadataCharge(key, metadata_charge_policy_);
size_t total_charge = charge + meta_charge;
MutexLock l(&mutex_);
bool success = EvictFromCache(total_charge, context);
bool strict = strict_capacity_limit_.load(std::memory_order_relaxed);
if (!success && (strict || !hold_reference)) {
context->to_delete_key.push_back(key.data());
if (!hold_reference) {
context->to_delete_value.emplace_back(key, value, deleter);
}
return nullptr;
}
// Grab available handle from recycle bin. If recycle bin is empty, create
// and append new handle to end of circular list.
CacheHandle* handle = nullptr;
if (!recycle_.empty()) {
handle = recycle_.back();
recycle_.pop_back();
} else {
list_.emplace_back();
handle = &list_.back();
}
// Fill handle.
handle->key = key;
handle->hash = hash;
handle->value = value;
handle->charge = charge;
handle->meta_charge = meta_charge;
handle->deleter = deleter;
uint32_t flags = hold_reference ? kInCacheBit + kOneRef : kInCacheBit;
// TODO investigate+fix suspected race condition:
// [thread 1] Lookup starts, up to Ref()
// [thread 2] Erase/evict the entry just looked up
// [thread 1] Ref() the handle, even though it's in the recycle bin
// [thread 2] Insert with recycling that handle
// Here we obliterate the other thread's Ref
// Possible fix: never blindly overwrite the flags, but only make
// relative updates (fetch_add, etc).
handle->flags.store(flags, std::memory_order_relaxed);
HashTable::accessor accessor;
if (table_.find(accessor, ClockCacheKey(key, hash))) {
*overwritten = true;
CacheHandle* existing_handle = accessor->second;
table_.erase(accessor);
UnsetInCache(existing_handle, context);
}
table_.insert(HashTable::value_type(ClockCacheKey(key, hash), handle));
if (hold_reference) {
pinned_usage_.fetch_add(total_charge, std::memory_order_relaxed);
}
usage_.fetch_add(total_charge, std::memory_order_relaxed);
return handle;
}
Status ClockCacheShard::Insert(const Slice& key, uint32_t hash, void* value,
size_t charge,
void (*deleter)(const Slice& key, void* value),
Cache::Handle** out_handle,
Cache::Priority /*priority*/) {
CleanupContext context;
HashTable::accessor accessor;
char* key_data = new char[key.size()];
memcpy(key_data, key.data(), key.size());
Slice key_copy(key_data, key.size());
bool overwritten = false;
CacheHandle* handle = Insert(key_copy, hash, value, charge, deleter,
out_handle != nullptr, &context, &overwritten);
Status s;
if (out_handle != nullptr) {
if (handle == nullptr) {
s = Status::Incomplete("Insert failed due to CLOCK cache being full.");
} else {
*out_handle = reinterpret_cast<Cache::Handle*>(handle);
}
}
if (overwritten) {
assert(s.ok());
s = Status::OkOverwritten();
}
Cleanup(context);
return s;
}
Cache::Handle* ClockCacheShard::Lookup(const Slice& key, uint32_t hash) {
HashTable::const_accessor accessor;
if (!table_.find(accessor, ClockCacheKey(key, hash))) {
return nullptr;
}
CacheHandle* handle = accessor->second;
accessor.release();
// Ref() could fail if another thread sneak in and evict/erase the cache
// entry before we are able to hold reference.
if (!Ref(reinterpret_cast<Cache::Handle*>(handle))) {
return nullptr;
}
// Double check the key since the handle may now representing another key
// if other threads sneak in, evict/erase the entry and re-used the handle
// for another cache entry.
if (hash != handle->hash || key != handle->key) {
CleanupContext context;
Unref(handle, false, &context);
// It is possible Unref() delete the entry, so we need to cleanup.
Cleanup(context);
return nullptr;
}
return reinterpret_cast<Cache::Handle*>(handle);
}
bool ClockCacheShard::Release(Cache::Handle* h, bool erase_if_last_ref) {
CleanupContext context;
CacheHandle* handle = reinterpret_cast<CacheHandle*>(h);
bool erased = Unref(handle, true, &context);
if (erase_if_last_ref && !erased) {
erased = EraseAndConfirm(handle->key, handle->hash, &context);
}
Cleanup(context);
return erased;
}
void ClockCacheShard::Erase(const Slice& key, uint32_t hash) {
CleanupContext context;
EraseAndConfirm(key, hash, &context);
Cleanup(context);
}
bool ClockCacheShard::EraseAndConfirm(const Slice& key, uint32_t hash,
CleanupContext* context) {
MutexLock l(&mutex_);
HashTable::accessor accessor;
bool erased = false;
if (table_.find(accessor, ClockCacheKey(key, hash))) {
CacheHandle* handle = accessor->second;
table_.erase(accessor);
erased = UnsetInCache(handle, context);
}
return erased;
}
void ClockCacheShard::EraseUnRefEntries() {
CleanupContext context;
{
MutexLock l(&mutex_);
table_.clear();
for (auto& handle : list_) {
UnsetInCache(&handle, &context);
}
}
Cleanup(context);
}
class ClockCache final : public ShardedCache {
public:
ClockCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit,
CacheMetadataChargePolicy metadata_charge_policy)
: ShardedCache(capacity, num_shard_bits, strict_capacity_limit) {
int num_shards = 1 << num_shard_bits;
shards_ = new ClockCacheShard[num_shards];
for (int i = 0; i < num_shards; i++) {
shards_[i].set_metadata_charge_policy(metadata_charge_policy);
}
SetCapacity(capacity);
SetStrictCapacityLimit(strict_capacity_limit);
}
~ClockCache() override { delete[] shards_; }
const char* Name() const override { return "ClockCache"; }
CacheShard* GetShard(uint32_t shard) override {
return reinterpret_cast<CacheShard*>(&shards_[shard]);
}
const CacheShard* GetShard(uint32_t shard) const override {
return reinterpret_cast<CacheShard*>(&shards_[shard]);
}
void* Value(Handle* handle) override {
return reinterpret_cast<const CacheHandle*>(handle)->value;
}
size_t GetCharge(Handle* handle) const override {
return reinterpret_cast<const CacheHandle*>(handle)->charge;
}
uint32_t GetHash(Handle* handle) const override {
return reinterpret_cast<const CacheHandle*>(handle)->hash;
}
DeleterFn GetDeleter(Handle* handle) const override {
return reinterpret_cast<const CacheHandle*>(handle)->deleter;
}
void DisownData() override {
// Leak data only if that won't generate an ASAN/valgrind warning
if (!kMustFreeHeapAllocations) {
shards_ = nullptr;
}
}
void WaitAll(std::vector<Handle*>& /*handles*/) override {}
private:
ClockCacheShard* shards_;
};
} // end anonymous namespace
std::shared_ptr<Cache> NewClockCache(
size_t capacity, int num_shard_bits, bool strict_capacity_limit,
CacheMetadataChargePolicy metadata_charge_policy) {
if (num_shard_bits < 0) {
num_shard_bits = GetDefaultCacheShardBits(capacity);
}
return std::make_shared<ClockCache>(
capacity, num_shard_bits, strict_capacity_limit, metadata_charge_policy);
}
} // namespace ROCKSDB_NAMESPACE
#endif // SUPPORT_CLOCK_CACHE

16
cache/clock_cache.h vendored
View File

@ -1,16 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#pragma once
#include "rocksdb/cache.h"
#if defined(TBB) && !defined(ROCKSDB_LITE)
#define SUPPORT_CLOCK_CACHE
#endif

View File

@ -1,171 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "cache/compressed_secondary_cache.h"
#include <memory>
#include "memory/memory_allocator.h"
#include "util/compression.h"
#include "util/string_util.h"
namespace ROCKSDB_NAMESPACE {
namespace {
void DeletionCallback(const Slice& /*key*/, void* obj) {
delete reinterpret_cast<CacheAllocationPtr*>(obj);
obj = nullptr;
}
} // namespace
CompressedSecondaryCache::CompressedSecondaryCache(
size_t capacity, int num_shard_bits, bool strict_capacity_limit,
double high_pri_pool_ratio,
std::shared_ptr<MemoryAllocator> memory_allocator, bool use_adaptive_mutex,
CacheMetadataChargePolicy metadata_charge_policy,
CompressionType compression_type, uint32_t compress_format_version)
: cache_options_(capacity, num_shard_bits, strict_capacity_limit,
high_pri_pool_ratio, memory_allocator, use_adaptive_mutex,
metadata_charge_policy, compression_type,
compress_format_version) {
cache_ = NewLRUCache(capacity, num_shard_bits, strict_capacity_limit,
high_pri_pool_ratio, memory_allocator,
use_adaptive_mutex, metadata_charge_policy);
}
CompressedSecondaryCache::~CompressedSecondaryCache() { cache_.reset(); }
std::unique_ptr<SecondaryCacheResultHandle> CompressedSecondaryCache::Lookup(
const Slice& key, const Cache::CreateCallback& create_cb, bool /*wait*/,
bool& is_in_sec_cache) {
std::unique_ptr<SecondaryCacheResultHandle> handle;
is_in_sec_cache = false;
Cache::Handle* lru_handle = cache_->Lookup(key);
if (lru_handle == nullptr) {
return handle;
}
CacheAllocationPtr* ptr =
reinterpret_cast<CacheAllocationPtr*>(cache_->Value(lru_handle));
void* value = nullptr;
size_t charge = 0;
Status s;
if (cache_options_.compression_type == kNoCompression) {
s = create_cb(ptr->get(), cache_->GetCharge(lru_handle), &value, &charge);
} else {
UncompressionContext uncompression_context(cache_options_.compression_type);
UncompressionInfo uncompression_info(uncompression_context,
UncompressionDict::GetEmptyDict(),
cache_options_.compression_type);
size_t uncompressed_size = 0;
CacheAllocationPtr uncompressed;
uncompressed = UncompressData(
uncompression_info, (char*)ptr->get(), cache_->GetCharge(lru_handle),
&uncompressed_size, cache_options_.compress_format_version,
cache_options_.memory_allocator.get());
if (!uncompressed) {
cache_->Release(lru_handle, /* erase_if_last_ref */ true);
return handle;
}
s = create_cb(uncompressed.get(), uncompressed_size, &value, &charge);
}
if (!s.ok()) {
cache_->Release(lru_handle, /* erase_if_last_ref */ true);
return handle;
}
cache_->Release(lru_handle, /* erase_if_last_ref */ true);
handle.reset(new CompressedSecondaryCacheResultHandle(value, charge));
return handle;
}
Status CompressedSecondaryCache::Insert(const Slice& key, void* value,
const Cache::CacheItemHelper* helper) {
size_t size = (*helper->size_cb)(value);
CacheAllocationPtr ptr =
AllocateBlock(size, cache_options_.memory_allocator.get());
Status s = (*helper->saveto_cb)(value, 0, size, ptr.get());
if (!s.ok()) {
return s;
}
Slice val(ptr.get(), size);
std::string compressed_val;
if (cache_options_.compression_type != kNoCompression) {
CompressionOptions compression_opts;
CompressionContext compression_context(cache_options_.compression_type);
uint64_t sample_for_compression = 0;
CompressionInfo compression_info(
compression_opts, compression_context, CompressionDict::GetEmptyDict(),
cache_options_.compression_type, sample_for_compression);
bool success =
CompressData(val, compression_info,
cache_options_.compress_format_version, &compressed_val);
if (!success) {
return Status::Corruption("Error compressing value.");
}
val = Slice(compressed_val);
size = compressed_val.size();
ptr = AllocateBlock(size, cache_options_.memory_allocator.get());
memcpy(ptr.get(), compressed_val.data(), size);
}
CacheAllocationPtr* buf = new CacheAllocationPtr(std::move(ptr));
return cache_->Insert(key, buf, size, DeletionCallback);
}
void CompressedSecondaryCache::Erase(const Slice& key) { cache_->Erase(key); }
std::string CompressedSecondaryCache::GetPrintableOptions() const {
std::string ret;
ret.reserve(20000);
const int kBufferSize = 200;
char buffer[kBufferSize];
ret.append(cache_->GetPrintableOptions());
snprintf(buffer, kBufferSize, " compression_type : %s\n",
CompressionTypeToString(cache_options_.compression_type).c_str());
ret.append(buffer);
snprintf(buffer, kBufferSize, " compression_type : %d\n",
cache_options_.compress_format_version);
ret.append(buffer);
return ret;
}
std::shared_ptr<SecondaryCache> NewCompressedSecondaryCache(
size_t capacity, int num_shard_bits, bool strict_capacity_limit,
double high_pri_pool_ratio,
std::shared_ptr<MemoryAllocator> memory_allocator, bool use_adaptive_mutex,
CacheMetadataChargePolicy metadata_charge_policy,
CompressionType compression_type, uint32_t compress_format_version) {
return std::make_shared<CompressedSecondaryCache>(
capacity, num_shard_bits, strict_capacity_limit, high_pri_pool_ratio,
memory_allocator, use_adaptive_mutex, metadata_charge_policy,
compression_type, compress_format_version);
}
std::shared_ptr<SecondaryCache> NewCompressedSecondaryCache(
const CompressedSecondaryCacheOptions& opts) {
// The secondary_cache is disabled for this LRUCache instance.
assert(opts.secondary_cache == nullptr);
return NewCompressedSecondaryCache(
opts.capacity, opts.num_shard_bits, opts.strict_capacity_limit,
opts.high_pri_pool_ratio, opts.memory_allocator, opts.use_adaptive_mutex,
opts.metadata_charge_policy, opts.compression_type,
opts.compress_format_version);
}
} // namespace ROCKSDB_NAMESPACE

View File

@ -1,86 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#include <memory>
#include "cache/lru_cache.h"
#include "memory/memory_allocator.h"
#include "rocksdb/secondary_cache.h"
#include "rocksdb/slice.h"
#include "rocksdb/status.h"
#include "util/compression.h"
namespace ROCKSDB_NAMESPACE {
class CompressedSecondaryCacheResultHandle : public SecondaryCacheResultHandle {
public:
CompressedSecondaryCacheResultHandle(void* value, size_t size)
: value_(value), size_(size) {}
virtual ~CompressedSecondaryCacheResultHandle() override = default;
CompressedSecondaryCacheResultHandle(
const CompressedSecondaryCacheResultHandle&) = delete;
CompressedSecondaryCacheResultHandle& operator=(
const CompressedSecondaryCacheResultHandle&) = delete;
bool IsReady() override { return true; }
void Wait() override {}
void* Value() override { return value_; }
size_t Size() override { return size_; }
private:
void* value_;
size_t size_;
};
// The CompressedSecondaryCache is a concrete implementation of
// rocksdb::SecondaryCache.
//
// Users can also cast a pointer to it and call methods on
// it directly, especially custom methods that may be added
// in the future. For example -
// std::unique_ptr<rocksdb::SecondaryCache> cache =
// NewCompressedSecondaryCache(opts);
// static_cast<CompressedSecondaryCache*>(cache.get())->Erase(key);
class CompressedSecondaryCache : public SecondaryCache {
public:
CompressedSecondaryCache(
size_t capacity, int num_shard_bits, bool strict_capacity_limit,
double high_pri_pool_ratio,
std::shared_ptr<MemoryAllocator> memory_allocator = nullptr,
bool use_adaptive_mutex = kDefaultToAdaptiveMutex,
CacheMetadataChargePolicy metadata_charge_policy =
kDontChargeCacheMetadata,
CompressionType compression_type = CompressionType::kLZ4Compression,
uint32_t compress_format_version = 2);
virtual ~CompressedSecondaryCache() override;
const char* Name() const override { return "CompressedSecondaryCache"; }
Status Insert(const Slice& key, void* value,
const Cache::CacheItemHelper* helper) override;
std::unique_ptr<SecondaryCacheResultHandle> Lookup(
const Slice& key, const Cache::CreateCallback& create_cb, bool /*wait*/,
bool& is_in_sec_cache) override;
void Erase(const Slice& key) override;
void WaitAll(std::vector<SecondaryCacheResultHandle*> /*handles*/) override {}
std::string GetPrintableOptions() const override;
private:
std::shared_ptr<Cache> cache_;
CompressedSecondaryCacheOptions cache_options_;
};
} // namespace ROCKSDB_NAMESPACE

View File

@ -1,607 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "cache/compressed_secondary_cache.h"
#include <algorithm>
#include <cstdint>
#include "memory/jemalloc_nodump_allocator.h"
#include "memory/memory_allocator.h"
#include "test_util/testharness.h"
#include "test_util/testutil.h"
#include "util/compression.h"
#include "util/random.h"
namespace ROCKSDB_NAMESPACE {
class CompressedSecondaryCacheTest : public testing::Test {
public:
CompressedSecondaryCacheTest() : fail_create_(false) {}
~CompressedSecondaryCacheTest() {}
protected:
class TestItem {
public:
TestItem(const char* buf, size_t size) : buf_(new char[size]), size_(size) {
memcpy(buf_.get(), buf, size);
}
~TestItem() {}
char* Buf() { return buf_.get(); }
size_t Size() { return size_; }
private:
std::unique_ptr<char[]> buf_;
size_t size_;
};
static size_t SizeCallback(void* obj) {
return reinterpret_cast<TestItem*>(obj)->Size();
}
static Status SaveToCallback(void* from_obj, size_t from_offset,
size_t length, void* out) {
TestItem* item = reinterpret_cast<TestItem*>(from_obj);
const char* buf = item->Buf();
EXPECT_EQ(length, item->Size());
EXPECT_EQ(from_offset, 0);
memcpy(out, buf, length);
return Status::OK();
}
static void DeletionCallback(const Slice& /*key*/, void* obj) {
delete reinterpret_cast<TestItem*>(obj);
obj = nullptr;
}
static Cache::CacheItemHelper helper_;
static Status SaveToCallbackFail(void* /*obj*/, size_t /*offset*/,
size_t /*size*/, void* /*out*/) {
return Status::NotSupported();
}
static Cache::CacheItemHelper helper_fail_;
Cache::CreateCallback test_item_creator = [&](const void* buf, size_t size,
void** out_obj,
size_t* charge) -> Status {
if (fail_create_) {
return Status::NotSupported();
}
*out_obj = reinterpret_cast<void*>(new TestItem((char*)buf, size));
*charge = size;
return Status::OK();
};
void SetFailCreate(bool fail) { fail_create_ = fail; }
void BasicTest(bool sec_cache_is_compressed, bool use_jemalloc) {
CompressedSecondaryCacheOptions opts;
opts.capacity = 2048;
opts.num_shard_bits = 0;
opts.metadata_charge_policy = kDontChargeCacheMetadata;
if (sec_cache_is_compressed) {
if (!LZ4_Supported()) {
ROCKSDB_GTEST_SKIP("This test requires LZ4 support.");
opts.compression_type = CompressionType::kNoCompression;
}
} else {
opts.compression_type = CompressionType::kNoCompression;
}
if (use_jemalloc) {
JemallocAllocatorOptions jopts;
std::shared_ptr<MemoryAllocator> allocator;
std::string msg;
if (JemallocNodumpAllocator::IsSupported(&msg)) {
Status s = NewJemallocNodumpAllocator(jopts, &allocator);
if (s.ok()) {
opts.memory_allocator = allocator;
}
} else {
ROCKSDB_GTEST_BYPASS("JEMALLOC not supported");
}
}
std::shared_ptr<SecondaryCache> sec_cache =
NewCompressedSecondaryCache(opts);
bool is_in_sec_cache{true};
// Lookup an non-existent key.
std::unique_ptr<SecondaryCacheResultHandle> handle0 =
sec_cache->Lookup("k0", test_item_creator, true, is_in_sec_cache);
ASSERT_EQ(handle0, nullptr);
Random rnd(301);
// Insert and Lookup the first item.
std::string str1;
test::CompressibleString(&rnd, 0.25, 1000, &str1);
TestItem item1(str1.data(), str1.length());
ASSERT_OK(sec_cache->Insert("k1", &item1,
&CompressedSecondaryCacheTest::helper_));
std::unique_ptr<SecondaryCacheResultHandle> handle1 =
sec_cache->Lookup("k1", test_item_creator, true, is_in_sec_cache);
ASSERT_NE(handle1, nullptr);
ASSERT_FALSE(is_in_sec_cache);
std::unique_ptr<TestItem> val1 =
std::unique_ptr<TestItem>(static_cast<TestItem*>(handle1->Value()));
ASSERT_NE(val1, nullptr);
ASSERT_EQ(memcmp(val1->Buf(), item1.Buf(), item1.Size()), 0);
// Lookup the first item again.
std::unique_ptr<SecondaryCacheResultHandle> handle1_1 =
sec_cache->Lookup("k1", test_item_creator, true, is_in_sec_cache);
ASSERT_EQ(handle1_1, nullptr);
// Insert and Lookup the second item.
std::string str2;
test::CompressibleString(&rnd, 0.5, 1000, &str2);
TestItem item2(str2.data(), str2.length());
ASSERT_OK(sec_cache->Insert("k2", &item2,
&CompressedSecondaryCacheTest::helper_));
std::unique_ptr<SecondaryCacheResultHandle> handle2 =
sec_cache->Lookup("k2", test_item_creator, true, is_in_sec_cache);
ASSERT_NE(handle2, nullptr);
std::unique_ptr<TestItem> val2 =
std::unique_ptr<TestItem>(static_cast<TestItem*>(handle2->Value()));
ASSERT_NE(val2, nullptr);
ASSERT_EQ(memcmp(val2->Buf(), item2.Buf(), item2.Size()), 0);
std::vector<SecondaryCacheResultHandle*> handles = {handle1.get(),
handle2.get()};
sec_cache->WaitAll(handles);
sec_cache.reset();
}
void FailsTest(bool sec_cache_is_compressed) {
CompressedSecondaryCacheOptions secondary_cache_opts;
if (sec_cache_is_compressed) {
if (!LZ4_Supported()) {
ROCKSDB_GTEST_SKIP("This test requires LZ4 support.");
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
} else {
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
secondary_cache_opts.capacity = 1100;
secondary_cache_opts.num_shard_bits = 0;
secondary_cache_opts.metadata_charge_policy = kDontChargeCacheMetadata;
std::shared_ptr<SecondaryCache> sec_cache =
NewCompressedSecondaryCache(secondary_cache_opts);
// Insert and Lookup the first item.
Random rnd(301);
std::string str1(rnd.RandomString(1000));
TestItem item1(str1.data(), str1.length());
ASSERT_OK(sec_cache->Insert("k1", &item1,
&CompressedSecondaryCacheTest::helper_));
// Insert and Lookup the second item.
std::string str2(rnd.RandomString(200));
TestItem item2(str2.data(), str2.length());
// k1 is evicted.
ASSERT_OK(sec_cache->Insert("k2", &item2,
&CompressedSecondaryCacheTest::helper_));
bool is_in_sec_cache{false};
std::unique_ptr<SecondaryCacheResultHandle> handle1_1 =
sec_cache->Lookup("k1", test_item_creator, true, is_in_sec_cache);
ASSERT_EQ(handle1_1, nullptr);
std::unique_ptr<SecondaryCacheResultHandle> handle2 =
sec_cache->Lookup("k2", test_item_creator, true, is_in_sec_cache);
ASSERT_NE(handle2, nullptr);
std::unique_ptr<TestItem> val2 =
std::unique_ptr<TestItem>(static_cast<TestItem*>(handle2->Value()));
ASSERT_NE(val2, nullptr);
ASSERT_EQ(memcmp(val2->Buf(), item2.Buf(), item2.Size()), 0);
// Create Fails.
SetFailCreate(true);
std::unique_ptr<SecondaryCacheResultHandle> handle2_1 =
sec_cache->Lookup("k2", test_item_creator, true, is_in_sec_cache);
ASSERT_EQ(handle2_1, nullptr);
// Save Fails.
std::string str3 = rnd.RandomString(10);
TestItem item3(str3.data(), str3.length());
ASSERT_NOK(sec_cache->Insert("k3", &item3,
&CompressedSecondaryCacheTest::helper_fail_));
sec_cache.reset();
}
void BasicIntegrationTest(bool sec_cache_is_compressed) {
CompressedSecondaryCacheOptions secondary_cache_opts;
if (sec_cache_is_compressed) {
if (!LZ4_Supported()) {
ROCKSDB_GTEST_SKIP("This test requires LZ4 support.");
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
} else {
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
secondary_cache_opts.capacity = 2300;
secondary_cache_opts.num_shard_bits = 0;
secondary_cache_opts.metadata_charge_policy = kDontChargeCacheMetadata;
std::shared_ptr<SecondaryCache> secondary_cache =
NewCompressedSecondaryCache(secondary_cache_opts);
LRUCacheOptions lru_cache_opts(1024, 0, false, 0.5, nullptr,
kDefaultToAdaptiveMutex,
kDontChargeCacheMetadata);
lru_cache_opts.secondary_cache = secondary_cache;
std::shared_ptr<Cache> cache = NewLRUCache(lru_cache_opts);
std::shared_ptr<Statistics> stats = CreateDBStatistics();
Random rnd(301);
std::string str1 = rnd.RandomString(1010);
std::string str1_clone{str1};
TestItem* item1 = new TestItem(str1.data(), str1.length());
ASSERT_OK(cache->Insert("k1", item1, &CompressedSecondaryCacheTest::helper_,
str1.length()));
std::string str2 = rnd.RandomString(1020);
TestItem* item2 = new TestItem(str2.data(), str2.length());
// After Insert, lru cache contains k2 and secondary cache contains k1.
ASSERT_OK(cache->Insert("k2", item2, &CompressedSecondaryCacheTest::helper_,
str2.length()));
std::string str3 = rnd.RandomString(1020);
TestItem* item3 = new TestItem(str3.data(), str3.length());
// After Insert, lru cache contains k3 and secondary cache contains k1 and
// k2
ASSERT_OK(cache->Insert("k3", item3, &CompressedSecondaryCacheTest::helper_,
str3.length()));
Cache::Handle* handle;
handle = cache->Lookup("k3", &CompressedSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true,
stats.get());
ASSERT_NE(handle, nullptr);
TestItem* val3 = static_cast<TestItem*>(cache->Value(handle));
ASSERT_NE(val3, nullptr);
ASSERT_EQ(memcmp(val3->Buf(), item3->Buf(), item3->Size()), 0);
cache->Release(handle);
// Lookup an non-existent key.
handle = cache->Lookup("k0", &CompressedSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true,
stats.get());
ASSERT_EQ(handle, nullptr);
// This Lookup should promote k1 and erase k1 from the secondary cache,
// then k3 is demoted. So k2 and k3 are in the secondary cache.
handle = cache->Lookup("k1", &CompressedSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true,
stats.get());
ASSERT_NE(handle, nullptr);
TestItem* val1_1 = static_cast<TestItem*>(cache->Value(handle));
ASSERT_NE(val1_1, nullptr);
ASSERT_EQ(memcmp(val1_1->Buf(), str1_clone.data(), str1_clone.size()), 0);
cache->Release(handle);
handle = cache->Lookup("k2", &CompressedSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true,
stats.get());
ASSERT_NE(handle, nullptr);
cache->Release(handle);
cache.reset();
secondary_cache.reset();
}
void BasicIntegrationFailTest(bool sec_cache_is_compressed) {
CompressedSecondaryCacheOptions secondary_cache_opts;
if (sec_cache_is_compressed) {
if (!LZ4_Supported()) {
ROCKSDB_GTEST_SKIP("This test requires LZ4 support.");
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
} else {
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
secondary_cache_opts.capacity = 2048;
secondary_cache_opts.num_shard_bits = 0;
secondary_cache_opts.metadata_charge_policy = kDontChargeCacheMetadata;
std::shared_ptr<SecondaryCache> secondary_cache =
NewCompressedSecondaryCache(secondary_cache_opts);
LRUCacheOptions opts(1024, 0, false, 0.5, nullptr, kDefaultToAdaptiveMutex,
kDontChargeCacheMetadata);
opts.secondary_cache = secondary_cache;
std::shared_ptr<Cache> cache = NewLRUCache(opts);
Random rnd(301);
std::string str1 = rnd.RandomString(1020);
auto item1 =
std::unique_ptr<TestItem>(new TestItem(str1.data(), str1.length()));
ASSERT_NOK(cache->Insert("k1", item1.get(), nullptr, str1.length()));
ASSERT_OK(cache->Insert("k1", item1.get(),
&CompressedSecondaryCacheTest::helper_,
str1.length()));
item1.release(); // Appease clang-analyze "potential memory leak"
Cache::Handle* handle;
handle = cache->Lookup("k2", nullptr, test_item_creator,
Cache::Priority::LOW, true);
ASSERT_EQ(handle, nullptr);
handle = cache->Lookup("k2", &CompressedSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, false);
ASSERT_EQ(handle, nullptr);
cache.reset();
secondary_cache.reset();
}
void IntegrationSaveFailTest(bool sec_cache_is_compressed) {
CompressedSecondaryCacheOptions secondary_cache_opts;
if (sec_cache_is_compressed) {
if (!LZ4_Supported()) {
ROCKSDB_GTEST_SKIP("This test requires LZ4 support.");
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
} else {
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
secondary_cache_opts.capacity = 2048;
secondary_cache_opts.num_shard_bits = 0;
secondary_cache_opts.metadata_charge_policy = kDontChargeCacheMetadata;
std::shared_ptr<SecondaryCache> secondary_cache =
NewCompressedSecondaryCache(secondary_cache_opts);
LRUCacheOptions opts(1024, 0, false, 0.5, nullptr, kDefaultToAdaptiveMutex,
kDontChargeCacheMetadata);
opts.secondary_cache = secondary_cache;
std::shared_ptr<Cache> cache = NewLRUCache(opts);
Random rnd(301);
std::string str1 = rnd.RandomString(1020);
TestItem* item1 = new TestItem(str1.data(), str1.length());
ASSERT_OK(cache->Insert("k1", item1,
&CompressedSecondaryCacheTest::helper_fail_,
str1.length()));
std::string str2 = rnd.RandomString(1020);
TestItem* item2 = new TestItem(str2.data(), str2.length());
// k1 should be demoted to the secondary cache.
ASSERT_OK(cache->Insert("k2", item2,
&CompressedSecondaryCacheTest::helper_fail_,
str2.length()));
Cache::Handle* handle;
handle = cache->Lookup("k2", &CompressedSecondaryCacheTest::helper_fail_,
test_item_creator, Cache::Priority::LOW, true);
ASSERT_NE(handle, nullptr);
cache->Release(handle);
// This lookup should fail, since k1 demotion would have failed
handle = cache->Lookup("k1", &CompressedSecondaryCacheTest::helper_fail_,
test_item_creator, Cache::Priority::LOW, true);
ASSERT_EQ(handle, nullptr);
// Since k1 didn't get promoted, k2 should still be in cache
handle = cache->Lookup("k2", &CompressedSecondaryCacheTest::helper_fail_,
test_item_creator, Cache::Priority::LOW, true);
ASSERT_NE(handle, nullptr);
cache->Release(handle);
cache.reset();
secondary_cache.reset();
}
void IntegrationCreateFailTest(bool sec_cache_is_compressed) {
CompressedSecondaryCacheOptions secondary_cache_opts;
if (sec_cache_is_compressed) {
if (!LZ4_Supported()) {
ROCKSDB_GTEST_SKIP("This test requires LZ4 support.");
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
} else {
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
secondary_cache_opts.capacity = 2048;
secondary_cache_opts.num_shard_bits = 0;
secondary_cache_opts.metadata_charge_policy = kDontChargeCacheMetadata;
std::shared_ptr<SecondaryCache> secondary_cache =
NewCompressedSecondaryCache(secondary_cache_opts);
LRUCacheOptions opts(1024, 0, false, 0.5, nullptr, kDefaultToAdaptiveMutex,
kDontChargeCacheMetadata);
opts.secondary_cache = secondary_cache;
std::shared_ptr<Cache> cache = NewLRUCache(opts);
Random rnd(301);
std::string str1 = rnd.RandomString(1020);
TestItem* item1 = new TestItem(str1.data(), str1.length());
ASSERT_OK(cache->Insert("k1", item1, &CompressedSecondaryCacheTest::helper_,
str1.length()));
std::string str2 = rnd.RandomString(1020);
TestItem* item2 = new TestItem(str2.data(), str2.length());
// k1 should be demoted to the secondary cache.
ASSERT_OK(cache->Insert("k2", item2, &CompressedSecondaryCacheTest::helper_,
str2.length()));
Cache::Handle* handle;
SetFailCreate(true);
handle = cache->Lookup("k2", &CompressedSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true);
ASSERT_NE(handle, nullptr);
cache->Release(handle);
// This lookup should fail, since k1 creation would have failed
handle = cache->Lookup("k1", &CompressedSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true);
ASSERT_EQ(handle, nullptr);
// Since k1 didn't get promoted, k2 should still be in cache
handle = cache->Lookup("k2", &CompressedSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true);
ASSERT_NE(handle, nullptr);
cache->Release(handle);
cache.reset();
secondary_cache.reset();
}
void IntegrationFullCapacityTest(bool sec_cache_is_compressed) {
CompressedSecondaryCacheOptions secondary_cache_opts;
if (sec_cache_is_compressed) {
if (!LZ4_Supported()) {
ROCKSDB_GTEST_SKIP("This test requires LZ4 support.");
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
} else {
secondary_cache_opts.compression_type = CompressionType::kNoCompression;
}
secondary_cache_opts.capacity = 2048;
secondary_cache_opts.num_shard_bits = 0;
secondary_cache_opts.metadata_charge_policy = kDontChargeCacheMetadata;
std::shared_ptr<SecondaryCache> secondary_cache =
NewCompressedSecondaryCache(secondary_cache_opts);
LRUCacheOptions opts(1024, 0, /*_strict_capacity_limit=*/true, 0.5, nullptr,
kDefaultToAdaptiveMutex, kDontChargeCacheMetadata);
opts.secondary_cache = secondary_cache;
std::shared_ptr<Cache> cache = NewLRUCache(opts);
Random rnd(301);
std::string str1 = rnd.RandomString(1020);
TestItem* item1 = new TestItem(str1.data(), str1.length());
ASSERT_OK(cache->Insert("k1", item1, &CompressedSecondaryCacheTest::helper_,
str1.length()));
std::string str2 = rnd.RandomString(1020);
TestItem* item2 = new TestItem(str2.data(), str2.length());
// k1 should be demoted to the secondary cache.
ASSERT_OK(cache->Insert("k2", item2, &CompressedSecondaryCacheTest::helper_,
str2.length()));
Cache::Handle* handle2;
handle2 = cache->Lookup("k2", &CompressedSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true);
ASSERT_NE(handle2, nullptr);
cache->Release(handle2);
// k1 promotion should fail due to the block cache being at capacity,
// but the lookup should still succeed
Cache::Handle* handle1;
handle1 = cache->Lookup("k1", &CompressedSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true);
ASSERT_NE(handle1, nullptr);
cache->Release(handle1);
// Since k1 didn't get inserted, k2 should still be in cache
handle2 = cache->Lookup("k2", &CompressedSecondaryCacheTest::helper_,
test_item_creator, Cache::Priority::LOW, true);
ASSERT_NE(handle2, nullptr);
cache->Release(handle2);
cache.reset();
secondary_cache.reset();
}
private:
bool fail_create_;
};
Cache::CacheItemHelper CompressedSecondaryCacheTest::helper_(
CompressedSecondaryCacheTest::SizeCallback,
CompressedSecondaryCacheTest::SaveToCallback,
CompressedSecondaryCacheTest::DeletionCallback);
Cache::CacheItemHelper CompressedSecondaryCacheTest::helper_fail_(
CompressedSecondaryCacheTest::SizeCallback,
CompressedSecondaryCacheTest::SaveToCallbackFail,
CompressedSecondaryCacheTest::DeletionCallback);
TEST_F(CompressedSecondaryCacheTest, BasicTestWithNoCompression) {
BasicTest(false, false);
}
TEST_F(CompressedSecondaryCacheTest,
BasicTestWithMemoryAllocatorAndNoCompression) {
BasicTest(false, true);
}
TEST_F(CompressedSecondaryCacheTest, BasicTestWithCompression) {
BasicTest(true, false);
}
TEST_F(CompressedSecondaryCacheTest,
BasicTestWithMemoryAllocatorAndCompression) {
BasicTest(true, true);
}
TEST_F(CompressedSecondaryCacheTest, FailsTestWithNoCompression) {
FailsTest(false);
}
TEST_F(CompressedSecondaryCacheTest, FailsTestWithCompression) {
FailsTest(true);
}
TEST_F(CompressedSecondaryCacheTest, BasicIntegrationTestWithNoCompression) {
BasicIntegrationTest(false);
}
TEST_F(CompressedSecondaryCacheTest, BasicIntegrationTestWithCompression) {
BasicIntegrationTest(true);
}
TEST_F(CompressedSecondaryCacheTest,
BasicIntegrationFailTestWithNoCompression) {
BasicIntegrationFailTest(false);
}
TEST_F(CompressedSecondaryCacheTest, BasicIntegrationFailTestWithCompression) {
BasicIntegrationFailTest(true);
}
TEST_F(CompressedSecondaryCacheTest, IntegrationSaveFailTestWithNoCompression) {
IntegrationSaveFailTest(false);
}
TEST_F(CompressedSecondaryCacheTest, IntegrationSaveFailTestWithCompression) {
IntegrationSaveFailTest(true);
}
TEST_F(CompressedSecondaryCacheTest,
IntegrationCreateFailTestWithNoCompression) {
IntegrationCreateFailTest(false);
}
TEST_F(CompressedSecondaryCacheTest, IntegrationCreateFailTestWithCompression) {
IntegrationCreateFailTest(true);
}
TEST_F(CompressedSecondaryCacheTest,
IntegrationFullCapacityTestWithNoCompression) {
IntegrationFullCapacityTest(false);
}
TEST_F(CompressedSecondaryCacheTest,
IntegrationFullCapacityTestWithCompression) {
IntegrationFullCapacityTest(true);
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

View File

@ -1,511 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "cache/fast_lru_cache.h"
#include <cassert>
#include <cstdint>
#include <cstdio>
#include "monitoring/perf_context_imp.h"
#include "monitoring/statistics.h"
#include "port/lang.h"
#include "util/mutexlock.h"
namespace ROCKSDB_NAMESPACE {
namespace fast_lru_cache {
LRUHandleTable::LRUHandleTable(int max_upper_hash_bits)
: length_bits_(/* historical starting size*/ 4),
list_(new LRUHandle* [size_t{1} << length_bits_] {}),
elems_(0),
max_length_bits_(max_upper_hash_bits) {}
LRUHandleTable::~LRUHandleTable() {
ApplyToEntriesRange(
[](LRUHandle* h) {
if (!h->HasRefs()) {
h->Free();
}
},
0, uint32_t{1} << length_bits_);
}
LRUHandle* LRUHandleTable::Lookup(const Slice& key, uint32_t hash) {
return *FindPointer(key, hash);
}
LRUHandle* LRUHandleTable::Insert(LRUHandle* h) {
LRUHandle** ptr = FindPointer(h->key(), h->hash);
LRUHandle* old = *ptr;
h->next_hash = (old == nullptr ? nullptr : old->next_hash);
*ptr = h;
if (old == nullptr) {
++elems_;
if ((elems_ >> length_bits_) > 0) { // elems_ >= length
// Since each cache entry is fairly large, we aim for a small
// average linked list length (<= 1).
Resize();
}
}
return old;
}
LRUHandle* LRUHandleTable::Remove(const Slice& key, uint32_t hash) {
LRUHandle** ptr = FindPointer(key, hash);
LRUHandle* result = *ptr;
if (result != nullptr) {
*ptr = result->next_hash;
--elems_;
}
return result;
}
LRUHandle** LRUHandleTable::FindPointer(const Slice& key, uint32_t hash) {
LRUHandle** ptr = &list_[hash >> (32 - length_bits_)];
while (*ptr != nullptr && ((*ptr)->hash != hash || key != (*ptr)->key())) {
ptr = &(*ptr)->next_hash;
}
return ptr;
}
void LRUHandleTable::Resize() {
if (length_bits_ >= max_length_bits_) {
// Due to reaching limit of hash information, if we made the table bigger,
// we would allocate more addresses but only the same number would be used.
return;
}
if (length_bits_ >= 31) {
// Avoid undefined behavior shifting uint32_t by 32.
return;
}
uint32_t old_length = uint32_t{1} << length_bits_;
int new_length_bits = length_bits_ + 1;
std::unique_ptr<LRUHandle* []> new_list {
new LRUHandle* [size_t{1} << new_length_bits] {}
};
uint32_t count = 0;
for (uint32_t i = 0; i < old_length; i++) {
LRUHandle* h = list_[i];
while (h != nullptr) {
LRUHandle* next = h->next_hash;
uint32_t hash = h->hash;
LRUHandle** ptr = &new_list[hash >> (32 - new_length_bits)];
h->next_hash = *ptr;
*ptr = h;
h = next;
count++;
}
}
assert(elems_ == count);
list_ = std::move(new_list);
length_bits_ = new_length_bits;
}
LRUCacheShard::LRUCacheShard(size_t capacity, bool strict_capacity_limit,
CacheMetadataChargePolicy metadata_charge_policy,
int max_upper_hash_bits)
: capacity_(0),
strict_capacity_limit_(strict_capacity_limit),
table_(max_upper_hash_bits),
usage_(0),
lru_usage_(0) {
set_metadata_charge_policy(metadata_charge_policy);
// Make empty circular linked list.
lru_.next = &lru_;
lru_.prev = &lru_;
lru_low_pri_ = &lru_;
SetCapacity(capacity);
}
void LRUCacheShard::EraseUnRefEntries() {
autovector<LRUHandle*> last_reference_list;
{
MutexLock l(&mutex_);
while (lru_.next != &lru_) {
LRUHandle* old = lru_.next;
// LRU list contains only elements which can be evicted.
assert(old->InCache() && !old->HasRefs());
LRU_Remove(old);
table_.Remove(old->key(), old->hash);
old->SetInCache(false);
size_t total_charge = old->CalcTotalCharge(metadata_charge_policy_);
assert(usage_ >= total_charge);
usage_ -= total_charge;
last_reference_list.push_back(old);
}
}
// Free the entries here outside of mutex for performance reasons.
for (auto entry : last_reference_list) {
entry->Free();
}
}
void LRUCacheShard::ApplyToSomeEntries(
const std::function<void(const Slice& key, void* value, size_t charge,
DeleterFn deleter)>& callback,
uint32_t average_entries_per_lock, uint32_t* state) {
// The state is essentially going to be the starting hash, which works
// nicely even if we resize between calls because we use upper-most
// hash bits for table indexes.
MutexLock l(&mutex_);
uint32_t length_bits = table_.GetLengthBits();
uint32_t length = uint32_t{1} << length_bits;
assert(average_entries_per_lock > 0);
// Assuming we are called with same average_entries_per_lock repeatedly,
// this simplifies some logic (index_end will not overflow).
assert(average_entries_per_lock < length || *state == 0);
uint32_t index_begin = *state >> (32 - length_bits);
uint32_t index_end = index_begin + average_entries_per_lock;
if (index_end >= length) {
// Going to end
index_end = length;
*state = UINT32_MAX;
} else {
*state = index_end << (32 - length_bits);
}
table_.ApplyToEntriesRange(
[callback](LRUHandle* h) {
callback(h->key(), h->value, h->charge, h->deleter);
},
index_begin, index_end);
}
void LRUCacheShard::LRU_Remove(LRUHandle* e) {
assert(e->next != nullptr);
assert(e->prev != nullptr);
e->next->prev = e->prev;
e->prev->next = e->next;
e->prev = e->next = nullptr;
size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
assert(lru_usage_ >= total_charge);
lru_usage_ -= total_charge;
}
void LRUCacheShard::LRU_Insert(LRUHandle* e) {
assert(e->next == nullptr);
assert(e->prev == nullptr);
size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
// Inset "e" to head of LRU list.
e->next = &lru_;
e->prev = lru_.prev;
e->prev->next = e;
e->next->prev = e;
lru_usage_ += total_charge;
}
void LRUCacheShard::EvictFromLRU(size_t charge,
autovector<LRUHandle*>* deleted) {
while ((usage_ + charge) > capacity_ && lru_.next != &lru_) {
LRUHandle* old = lru_.next;
// LRU list contains only elements which can be evicted.
assert(old->InCache() && !old->HasRefs());
LRU_Remove(old);
table_.Remove(old->key(), old->hash);
old->SetInCache(false);
size_t old_total_charge = old->CalcTotalCharge(metadata_charge_policy_);
assert(usage_ >= old_total_charge);
usage_ -= old_total_charge;
deleted->push_back(old);
}
}
void LRUCacheShard::SetCapacity(size_t capacity) {
autovector<LRUHandle*> last_reference_list;
{
MutexLock l(&mutex_);
capacity_ = capacity;
EvictFromLRU(0, &last_reference_list);
}
// Free the entries here outside of mutex for performance reasons.
for (auto entry : last_reference_list) {
entry->Free();
}
}
void LRUCacheShard::SetStrictCapacityLimit(bool strict_capacity_limit) {
MutexLock l(&mutex_);
strict_capacity_limit_ = strict_capacity_limit;
}
Status LRUCacheShard::InsertItem(LRUHandle* e, Cache::Handle** handle,
bool free_handle_on_fail) {
Status s = Status::OK();
autovector<LRUHandle*> last_reference_list;
size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
{
MutexLock l(&mutex_);
// Free the space following strict LRU policy until enough space
// is freed or the lru list is empty.
EvictFromLRU(total_charge, &last_reference_list);
if ((usage_ + total_charge) > capacity_ &&
(strict_capacity_limit_ || handle == nullptr)) {
e->SetInCache(false);
if (handle == nullptr) {
// Don't insert the entry but still return ok, as if the entry inserted
// into cache and get evicted immediately.
last_reference_list.push_back(e);
} else {
if (free_handle_on_fail) {
delete[] reinterpret_cast<char*>(e);
*handle = nullptr;
}
s = Status::Incomplete("Insert failed due to LRU cache being full.");
}
} else {
// Insert into the cache. Note that the cache might get larger than its
// capacity if not enough space was freed up.
LRUHandle* old = table_.Insert(e);
usage_ += total_charge;
if (old != nullptr) {
s = Status::OkOverwritten();
assert(old->InCache());
old->SetInCache(false);
if (!old->HasRefs()) {
// old is on LRU because it's in cache and its reference count is 0.
LRU_Remove(old);
size_t old_total_charge =
old->CalcTotalCharge(metadata_charge_policy_);
assert(usage_ >= old_total_charge);
usage_ -= old_total_charge;
last_reference_list.push_back(old);
}
}
if (handle == nullptr) {
LRU_Insert(e);
} else {
// If caller already holds a ref, no need to take one here.
if (!e->HasRefs()) {
e->Ref();
}
*handle = reinterpret_cast<Cache::Handle*>(e);
}
}
}
// Free the entries here outside of mutex for performance reasons.
for (auto entry : last_reference_list) {
entry->Free();
}
return s;
}
Cache::Handle* LRUCacheShard::Lookup(const Slice& key, uint32_t hash) {
LRUHandle* e = nullptr;
{
MutexLock l(&mutex_);
e = table_.Lookup(key, hash);
if (e != nullptr) {
assert(e->InCache());
if (!e->HasRefs()) {
// The entry is in LRU since it's in hash and has no external references
LRU_Remove(e);
}
e->Ref();
}
}
return reinterpret_cast<Cache::Handle*>(e);
}
bool LRUCacheShard::Ref(Cache::Handle* h) {
LRUHandle* e = reinterpret_cast<LRUHandle*>(h);
MutexLock l(&mutex_);
// To create another reference - entry must be already externally referenced.
assert(e->HasRefs());
e->Ref();
return true;
}
bool LRUCacheShard::Release(Cache::Handle* handle, bool erase_if_last_ref) {
if (handle == nullptr) {
return false;
}
LRUHandle* e = reinterpret_cast<LRUHandle*>(handle);
bool last_reference = false;
{
MutexLock l(&mutex_);
last_reference = e->Unref();
if (last_reference && e->InCache()) {
// The item is still in cache, and nobody else holds a reference to it.
if (usage_ > capacity_ || erase_if_last_ref) {
// The LRU list must be empty since the cache is full.
assert(lru_.next == &lru_ || erase_if_last_ref);
// Take this opportunity and remove the item.
table_.Remove(e->key(), e->hash);
e->SetInCache(false);
} else {
// Put the item back on the LRU list, and don't free it.
LRU_Insert(e);
last_reference = false;
}
}
// If it was the last reference, then decrement the cache usage.
if (last_reference) {
size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
assert(usage_ >= total_charge);
usage_ -= total_charge;
}
}
// Free the entry here outside of mutex for performance reasons.
if (last_reference) {
e->Free();
}
return last_reference;
}
Status LRUCacheShard::Insert(const Slice& key, uint32_t hash, void* value,
size_t charge, Cache::DeleterFn deleter,
Cache::Handle** handle,
Cache::Priority /*priority*/) {
// Allocate the memory here outside of the mutex.
// If the cache is full, we'll have to release it.
// It shouldn't happen very often though.
LRUHandle* e = reinterpret_cast<LRUHandle*>(
new char[sizeof(LRUHandle) - 1 + key.size()]);
e->value = value;
e->flags = 0;
e->deleter = deleter;
e->charge = charge;
e->key_length = key.size();
e->hash = hash;
e->refs = 0;
e->next = e->prev = nullptr;
e->SetInCache(true);
memcpy(e->key_data, key.data(), key.size());
return InsertItem(e, handle, /* free_handle_on_fail */ true);
}
void LRUCacheShard::Erase(const Slice& key, uint32_t hash) {
LRUHandle* e;
bool last_reference = false;
{
MutexLock l(&mutex_);
e = table_.Remove(key, hash);
if (e != nullptr) {
assert(e->InCache());
e->SetInCache(false);
if (!e->HasRefs()) {
// The entry is in LRU since it's in hash and has no external references
LRU_Remove(e);
size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
assert(usage_ >= total_charge);
usage_ -= total_charge;
last_reference = true;
}
}
}
// Free the entry here outside of mutex for performance reasons.
// last_reference will only be true if e != nullptr.
if (last_reference) {
e->Free();
}
}
size_t LRUCacheShard::GetUsage() const {
MutexLock l(&mutex_);
return usage_;
}
size_t LRUCacheShard::GetPinnedUsage() const {
MutexLock l(&mutex_);
assert(usage_ >= lru_usage_);
return usage_ - lru_usage_;
}
std::string LRUCacheShard::GetPrintableOptions() const { return std::string{}; }
LRUCache::LRUCache(size_t capacity, int num_shard_bits,
bool strict_capacity_limit,
CacheMetadataChargePolicy metadata_charge_policy)
: ShardedCache(capacity, num_shard_bits, strict_capacity_limit) {
num_shards_ = 1 << num_shard_bits;
shards_ = reinterpret_cast<LRUCacheShard*>(
port::cacheline_aligned_alloc(sizeof(LRUCacheShard) * num_shards_));
size_t per_shard = (capacity + (num_shards_ - 1)) / num_shards_;
for (int i = 0; i < num_shards_; i++) {
new (&shards_[i])
LRUCacheShard(per_shard, strict_capacity_limit, metadata_charge_policy,
/* max_upper_hash_bits */ 32 - num_shard_bits);
}
}
LRUCache::~LRUCache() {
if (shards_ != nullptr) {
assert(num_shards_ > 0);
for (int i = 0; i < num_shards_; i++) {
shards_[i].~LRUCacheShard();
}
port::cacheline_aligned_free(shards_);
}
}
CacheShard* LRUCache::GetShard(uint32_t shard) {
return reinterpret_cast<CacheShard*>(&shards_[shard]);
}
const CacheShard* LRUCache::GetShard(uint32_t shard) const {
return reinterpret_cast<CacheShard*>(&shards_[shard]);
}
void* LRUCache::Value(Handle* handle) {
return reinterpret_cast<const LRUHandle*>(handle)->value;
}
size_t LRUCache::GetCharge(Handle* handle) const {
return reinterpret_cast<const LRUHandle*>(handle)->charge;
}
Cache::DeleterFn LRUCache::GetDeleter(Handle* handle) const {
auto h = reinterpret_cast<const LRUHandle*>(handle);
return h->deleter;
}
uint32_t LRUCache::GetHash(Handle* handle) const {
return reinterpret_cast<const LRUHandle*>(handle)->hash;
}
void LRUCache::DisownData() {
// Leak data only if that won't generate an ASAN/valgrind warning.
if (!kMustFreeHeapAllocations) {
shards_ = nullptr;
num_shards_ = 0;
}
}
} // namespace fast_lru_cache
std::shared_ptr<Cache> NewFastLRUCache(
size_t capacity, int num_shard_bits, bool strict_capacity_limit,
CacheMetadataChargePolicy metadata_charge_policy) {
if (num_shard_bits >= 20) {
return nullptr; // The cache cannot be sharded into too many fine pieces.
}
if (num_shard_bits < 0) {
num_shard_bits = GetDefaultCacheShardBits(capacity);
}
return std::make_shared<fast_lru_cache::LRUCache>(
capacity, num_shard_bits, strict_capacity_limit, metadata_charge_policy);
}
} // namespace ROCKSDB_NAMESPACE

299
cache/fast_lru_cache.h vendored
View File

@ -1,299 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#pragma once
#include <memory>
#include <string>
#include "cache/sharded_cache.h"
#include "port/lang.h"
#include "port/malloc.h"
#include "port/port.h"
#include "rocksdb/secondary_cache.h"
#include "util/autovector.h"
namespace ROCKSDB_NAMESPACE {
namespace fast_lru_cache {
// An experimental (under development!) alternative to LRUCache
struct LRUHandle {
void* value;
Cache::DeleterFn deleter;
LRUHandle* next_hash;
LRUHandle* next;
LRUHandle* prev;
size_t charge; // TODO(opt): Only allow uint32_t?
size_t key_length;
// The hash of key(). Used for fast sharding and comparisons.
uint32_t hash;
// The number of external refs to this entry. The cache itself is not counted.
uint32_t refs;
enum Flags : uint8_t {
// Whether this entry is referenced by the hash table.
IN_CACHE = (1 << 0),
};
uint8_t flags;
// Beginning of the key (MUST BE THE LAST FIELD IN THIS STRUCT!)
char key_data[1];
Slice key() const { return Slice(key_data, key_length); }
// Increase the reference count by 1.
void Ref() { refs++; }
// Just reduce the reference count by 1. Return true if it was last reference.
bool Unref() {
assert(refs > 0);
refs--;
return refs == 0;
}
// Return true if there are external refs, false otherwise.
bool HasRefs() const { return refs > 0; }
bool InCache() const { return flags & IN_CACHE; }
void SetInCache(bool in_cache) {
if (in_cache) {
flags |= IN_CACHE;
} else {
flags &= ~IN_CACHE;
}
}
void Free() {
assert(refs == 0);
if (deleter) {
(*deleter)(key(), value);
}
delete[] reinterpret_cast<char*>(this);
}
// Calculate the memory usage by metadata.
inline size_t CalcTotalCharge(
CacheMetadataChargePolicy metadata_charge_policy) {
size_t meta_charge = 0;
if (metadata_charge_policy == kFullChargeCacheMetadata) {
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
meta_charge += malloc_usable_size(static_cast<void*>(this));
#else
// This is the size that is used when a new handle is created.
meta_charge += sizeof(LRUHandle) - 1 + key_length;
#endif
}
return charge + meta_charge;
}
};
// We provide our own simple hash table since it removes a whole bunch
// of porting hacks and is also faster than some of the built-in hash
// table implementations in some of the compiler/runtime combinations
// we have tested. E.g., readrandom speeds up by ~5% over the g++
// 4.4.3's builtin hashtable.
class LRUHandleTable {
public:
// If the table uses more hash bits than `max_upper_hash_bits`,
// it will eat into the bits used for sharding, which are constant
// for a given LRUHandleTable.
explicit LRUHandleTable(int max_upper_hash_bits);
~LRUHandleTable();
LRUHandle* Lookup(const Slice& key, uint32_t hash);
LRUHandle* Insert(LRUHandle* h);
LRUHandle* Remove(const Slice& key, uint32_t hash);
template <typename T>
void ApplyToEntriesRange(T func, uint32_t index_begin, uint32_t index_end) {
for (uint32_t i = index_begin; i < index_end; i++) {
LRUHandle* h = list_[i];
while (h != nullptr) {
auto n = h->next_hash;
assert(h->InCache());
func(h);
h = n;
}
}
}
int GetLengthBits() const { return length_bits_; }
private:
// Return a pointer to slot that points to a cache entry that
// matches key/hash. If there is no such cache entry, return a
// pointer to the trailing slot in the corresponding linked list.
LRUHandle** FindPointer(const Slice& key, uint32_t hash);
void Resize();
// Number of hash bits (upper because lower bits used for sharding)
// used for table index. Length == 1 << length_bits_
int length_bits_;
// The table consists of an array of buckets where each bucket is
// a linked list of cache entries that hash into the bucket.
std::unique_ptr<LRUHandle*[]> list_;
// Number of elements currently in the table.
uint32_t elems_;
// Set from max_upper_hash_bits (see constructor).
const int max_length_bits_;
};
// A single shard of sharded cache.
class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard {
public:
LRUCacheShard(size_t capacity, bool strict_capacity_limit,
CacheMetadataChargePolicy metadata_charge_policy,
int max_upper_hash_bits);
~LRUCacheShard() override = default;
// Separate from constructor so caller can easily make an array of LRUCache
// if current usage is more than new capacity, the function will attempt to
// free the needed space.
void SetCapacity(size_t capacity) override;
// Set the flag to reject insertion if cache if full.
void SetStrictCapacityLimit(bool strict_capacity_limit) override;
// Like Cache methods, but with an extra "hash" parameter.
Status Insert(const Slice& key, uint32_t hash, void* value, size_t charge,
Cache::DeleterFn deleter, Cache::Handle** handle,
Cache::Priority priority) override;
Status Insert(const Slice& key, uint32_t hash, void* value,
const Cache::CacheItemHelper* helper, size_t charge,
Cache::Handle** handle, Cache::Priority priority) override {
return Insert(key, hash, value, charge, helper->del_cb, handle, priority);
}
Cache::Handle* Lookup(const Slice& key, uint32_t hash,
const Cache::CacheItemHelper* /*helper*/,
const Cache::CreateCallback& /*create_cb*/,
Cache::Priority /*priority*/, bool /*wait*/,
Statistics* /*stats*/) override {
return Lookup(key, hash);
}
Cache::Handle* Lookup(const Slice& key, uint32_t hash) override;
bool Release(Cache::Handle* handle, bool /*useful*/,
bool erase_if_last_ref) override {
return Release(handle, erase_if_last_ref);
}
bool IsReady(Cache::Handle* /*handle*/) override { return true; }
void Wait(Cache::Handle* /*handle*/) override {}
bool Ref(Cache::Handle* handle) override;
bool Release(Cache::Handle* handle, bool erase_if_last_ref = false) override;
void Erase(const Slice& key, uint32_t hash) override;
size_t GetUsage() const override;
size_t GetPinnedUsage() const override;
void ApplyToSomeEntries(
const std::function<void(const Slice& key, void* value, size_t charge,
DeleterFn deleter)>& callback,
uint32_t average_entries_per_lock, uint32_t* state) override;
void EraseUnRefEntries() override;
std::string GetPrintableOptions() const override;
private:
friend class LRUCache;
// Insert an item into the hash table and, if handle is null, insert into
// the LRU list. Older items are evicted as necessary. If the cache is full
// and free_handle_on_fail is true, the item is deleted and handle is set to
// nullptr.
Status InsertItem(LRUHandle* item, Cache::Handle** handle,
bool free_handle_on_fail);
void LRU_Remove(LRUHandle* e);
void LRU_Insert(LRUHandle* e);
// Free some space following strict LRU policy until enough space
// to hold (usage_ + charge) is freed or the lru list is empty
// This function is not thread safe - it needs to be executed while
// holding the mutex_.
void EvictFromLRU(size_t charge, autovector<LRUHandle*>* deleted);
// Initialized before use.
size_t capacity_;
// Whether to reject insertion if cache reaches its full capacity.
bool strict_capacity_limit_;
// Dummy head of LRU list.
// lru.prev is newest entry, lru.next is oldest entry.
// LRU contains items which can be evicted, ie reference only by cache
LRUHandle lru_;
// Pointer to head of low-pri pool in LRU list.
LRUHandle* lru_low_pri_;
// ------------^^^^^^^^^^^^^-----------
// Not frequently modified data members
// ------------------------------------
//
// We separate data members that are updated frequently from the ones that
// are not frequently updated so that they don't share the same cache line
// which will lead into false cache sharing
//
// ------------------------------------
// Frequently modified data members
// ------------vvvvvvvvvvvvv-----------
LRUHandleTable table_;
// Memory size for entries residing in the cache.
size_t usage_;
// Memory size for entries residing only in the LRU list.
size_t lru_usage_;
// mutex_ protects the following state.
// We don't count mutex_ as the cache's internal state so semantically we
// don't mind mutex_ invoking the non-const actions.
mutable port::Mutex mutex_;
};
class LRUCache
#ifdef NDEBUG
final
#endif
: public ShardedCache {
public:
LRUCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit,
CacheMetadataChargePolicy metadata_charge_policy =
kDontChargeCacheMetadata);
~LRUCache() override;
const char* Name() const override { return "LRUCache"; }
CacheShard* GetShard(uint32_t shard) override;
const CacheShard* GetShard(uint32_t shard) const override;
void* Value(Handle* handle) override;
size_t GetCharge(Handle* handle) const override;
uint32_t GetHash(Handle* handle) const override;
DeleterFn GetDeleter(Handle* handle) const override;
void DisownData() override;
private:
LRUCacheShard* shards_ = nullptr;
int num_shards_ = 0;
};
} // namespace fast_lru_cache
std::shared_ptr<Cache> NewFastLRUCache(
size_t capacity, int num_shard_bits = -1,
bool strict_capacity_limit = false,
CacheMetadataChargePolicy metadata_charge_policy =
kDefaultCacheMetadataChargePolicy);
} // namespace ROCKSDB_NAMESPACE

804
cache/lru_cache.cc vendored
View File

@ -1,804 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "cache/lru_cache.h"
#include <cassert>
#include <cstdint>
#include <cstdio>
#include "monitoring/perf_context_imp.h"
#include "monitoring/statistics.h"
#include "port/lang.h"
#include "util/mutexlock.h"
namespace ROCKSDB_NAMESPACE {
namespace lru_cache {
LRUHandleTable::LRUHandleTable(int max_upper_hash_bits)
: length_bits_(/* historical starting size*/ 4),
list_(new LRUHandle* [size_t{1} << length_bits_] {}),
elems_(0),
max_length_bits_(max_upper_hash_bits) {}
LRUHandleTable::~LRUHandleTable() {
ApplyToEntriesRange(
[](LRUHandle* h) {
if (!h->HasRefs()) {
h->Free();
}
},
0, uint32_t{1} << length_bits_);
}
LRUHandle* LRUHandleTable::Lookup(const Slice& key, uint32_t hash) {
return *FindPointer(key, hash);
}
LRUHandle* LRUHandleTable::Insert(LRUHandle* h) {
LRUHandle** ptr = FindPointer(h->key(), h->hash);
LRUHandle* old = *ptr;
h->next_hash = (old == nullptr ? nullptr : old->next_hash);
*ptr = h;
if (old == nullptr) {
++elems_;
if ((elems_ >> length_bits_) > 0) { // elems_ >= length
// Since each cache entry is fairly large, we aim for a small
// average linked list length (<= 1).
Resize();
}
}
return old;
}
LRUHandle* LRUHandleTable::Remove(const Slice& key, uint32_t hash) {
LRUHandle** ptr = FindPointer(key, hash);
LRUHandle* result = *ptr;
if (result != nullptr) {
*ptr = result->next_hash;
--elems_;
}
return result;
}
LRUHandle** LRUHandleTable::FindPointer(const Slice& key, uint32_t hash) {
LRUHandle** ptr = &list_[hash >> (32 - length_bits_)];
while (*ptr != nullptr && ((*ptr)->hash != hash || key != (*ptr)->key())) {
ptr = &(*ptr)->next_hash;
}
return ptr;
}
void LRUHandleTable::Resize() {
if (length_bits_ >= max_length_bits_) {
// Due to reaching limit of hash information, if we made the table bigger,
// we would allocate more addresses but only the same number would be used.
return;
}
if (length_bits_ >= 31) {
// Avoid undefined behavior shifting uint32_t by 32.
return;
}
uint32_t old_length = uint32_t{1} << length_bits_;
int new_length_bits = length_bits_ + 1;
std::unique_ptr<LRUHandle* []> new_list {
new LRUHandle* [size_t{1} << new_length_bits] {}
};
uint32_t count = 0;
for (uint32_t i = 0; i < old_length; i++) {
LRUHandle* h = list_[i];
while (h != nullptr) {
LRUHandle* next = h->next_hash;
uint32_t hash = h->hash;
LRUHandle** ptr = &new_list[hash >> (32 - new_length_bits)];
h->next_hash = *ptr;
*ptr = h;
h = next;
count++;
}
}
assert(elems_ == count);
list_ = std::move(new_list);
length_bits_ = new_length_bits;
}
LRUCacheShard::LRUCacheShard(
size_t capacity, bool strict_capacity_limit, double high_pri_pool_ratio,
bool use_adaptive_mutex, CacheMetadataChargePolicy metadata_charge_policy,
int max_upper_hash_bits,
const std::shared_ptr<SecondaryCache>& secondary_cache)
: capacity_(0),
high_pri_pool_usage_(0),
strict_capacity_limit_(strict_capacity_limit),
high_pri_pool_ratio_(high_pri_pool_ratio),
high_pri_pool_capacity_(0),
table_(max_upper_hash_bits),
usage_(0),
lru_usage_(0),
mutex_(use_adaptive_mutex),
secondary_cache_(secondary_cache) {
set_metadata_charge_policy(metadata_charge_policy);
// Make empty circular linked list.
lru_.next = &lru_;
lru_.prev = &lru_;
lru_low_pri_ = &lru_;
SetCapacity(capacity);
}
void LRUCacheShard::EraseUnRefEntries() {
autovector<LRUHandle*> last_reference_list;
{
MutexLock l(&mutex_);
while (lru_.next != &lru_) {
LRUHandle* old = lru_.next;
// LRU list contains only elements which can be evicted.
assert(old->InCache() && !old->HasRefs());
LRU_Remove(old);
table_.Remove(old->key(), old->hash);
old->SetInCache(false);
size_t total_charge = old->CalcTotalCharge(metadata_charge_policy_);
assert(usage_ >= total_charge);
usage_ -= total_charge;
last_reference_list.push_back(old);
}
}
for (auto entry : last_reference_list) {
entry->Free();
}
}
void LRUCacheShard::ApplyToSomeEntries(
const std::function<void(const Slice& key, void* value, size_t charge,
DeleterFn deleter)>& callback,
uint32_t average_entries_per_lock, uint32_t* state) {
// The state is essentially going to be the starting hash, which works
// nicely even if we resize between calls because we use upper-most
// hash bits for table indexes.
MutexLock l(&mutex_);
uint32_t length_bits = table_.GetLengthBits();
uint32_t length = uint32_t{1} << length_bits;
assert(average_entries_per_lock > 0);
// Assuming we are called with same average_entries_per_lock repeatedly,
// this simplifies some logic (index_end will not overflow).
assert(average_entries_per_lock < length || *state == 0);
uint32_t index_begin = *state >> (32 - length_bits);
uint32_t index_end = index_begin + average_entries_per_lock;
if (index_end >= length) {
// Going to end
index_end = length;
*state = UINT32_MAX;
} else {
*state = index_end << (32 - length_bits);
}
table_.ApplyToEntriesRange(
[callback](LRUHandle* h) {
DeleterFn deleter = h->IsSecondaryCacheCompatible()
? h->info_.helper->del_cb
: h->info_.deleter;
callback(h->key(), h->value, h->charge, deleter);
},
index_begin, index_end);
}
void LRUCacheShard::TEST_GetLRUList(LRUHandle** lru, LRUHandle** lru_low_pri) {
MutexLock l(&mutex_);
*lru = &lru_;
*lru_low_pri = lru_low_pri_;
}
size_t LRUCacheShard::TEST_GetLRUSize() {
MutexLock l(&mutex_);
LRUHandle* lru_handle = lru_.next;
size_t lru_size = 0;
while (lru_handle != &lru_) {
lru_size++;
lru_handle = lru_handle->next;
}
return lru_size;
}
double LRUCacheShard::GetHighPriPoolRatio() {
MutexLock l(&mutex_);
return high_pri_pool_ratio_;
}
void LRUCacheShard::LRU_Remove(LRUHandle* e) {
assert(e->next != nullptr);
assert(e->prev != nullptr);
if (lru_low_pri_ == e) {
lru_low_pri_ = e->prev;
}
e->next->prev = e->prev;
e->prev->next = e->next;
e->prev = e->next = nullptr;
size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
assert(lru_usage_ >= total_charge);
lru_usage_ -= total_charge;
if (e->InHighPriPool()) {
assert(high_pri_pool_usage_ >= total_charge);
high_pri_pool_usage_ -= total_charge;
}
}
void LRUCacheShard::LRU_Insert(LRUHandle* e) {
assert(e->next == nullptr);
assert(e->prev == nullptr);
size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
if (high_pri_pool_ratio_ > 0 && (e->IsHighPri() || e->HasHit())) {
// Inset "e" to head of LRU list.
e->next = &lru_;
e->prev = lru_.prev;
e->prev->next = e;
e->next->prev = e;
e->SetInHighPriPool(true);
high_pri_pool_usage_ += total_charge;
MaintainPoolSize();
} else {
// Insert "e" to the head of low-pri pool. Note that when
// high_pri_pool_ratio is 0, head of low-pri pool is also head of LRU list.
e->next = lru_low_pri_->next;
e->prev = lru_low_pri_;
e->prev->next = e;
e->next->prev = e;
e->SetInHighPriPool(false);
lru_low_pri_ = e;
}
lru_usage_ += total_charge;
}
void LRUCacheShard::MaintainPoolSize() {
while (high_pri_pool_usage_ > high_pri_pool_capacity_) {
// Overflow last entry in high-pri pool to low-pri pool.
lru_low_pri_ = lru_low_pri_->next;
assert(lru_low_pri_ != &lru_);
lru_low_pri_->SetInHighPriPool(false);
size_t total_charge =
lru_low_pri_->CalcTotalCharge(metadata_charge_policy_);
assert(high_pri_pool_usage_ >= total_charge);
high_pri_pool_usage_ -= total_charge;
}
}
void LRUCacheShard::EvictFromLRU(size_t charge,
autovector<LRUHandle*>* deleted) {
while ((usage_ + charge) > capacity_ && lru_.next != &lru_) {
LRUHandle* old = lru_.next;
// LRU list contains only elements which can be evicted.
assert(old->InCache() && !old->HasRefs());
LRU_Remove(old);
table_.Remove(old->key(), old->hash);
old->SetInCache(false);
size_t old_total_charge = old->CalcTotalCharge(metadata_charge_policy_);
assert(usage_ >= old_total_charge);
usage_ -= old_total_charge;
deleted->push_back(old);
}
}
void LRUCacheShard::SetCapacity(size_t capacity) {
autovector<LRUHandle*> last_reference_list;
{
MutexLock l(&mutex_);
capacity_ = capacity;
high_pri_pool_capacity_ = capacity_ * high_pri_pool_ratio_;
EvictFromLRU(0, &last_reference_list);
}
// Try to insert the evicted entries into tiered cache.
// Free the entries outside of mutex for performance reasons.
for (auto entry : last_reference_list) {
if (secondary_cache_ && entry->IsSecondaryCacheCompatible() &&
!entry->IsInSecondaryCache()) {
secondary_cache_->Insert(entry->key(), entry->value, entry->info_.helper)
.PermitUncheckedError();
}
entry->Free();
}
}
void LRUCacheShard::SetStrictCapacityLimit(bool strict_capacity_limit) {
MutexLock l(&mutex_);
strict_capacity_limit_ = strict_capacity_limit;
}
Status LRUCacheShard::InsertItem(LRUHandle* e, Cache::Handle** handle,
bool free_handle_on_fail) {
Status s = Status::OK();
autovector<LRUHandle*> last_reference_list;
size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
{
MutexLock l(&mutex_);
// Free the space following strict LRU policy until enough space
// is freed or the lru list is empty.
EvictFromLRU(total_charge, &last_reference_list);
if ((usage_ + total_charge) > capacity_ &&
(strict_capacity_limit_ || handle == nullptr)) {
e->SetInCache(false);
if (handle == nullptr) {
// Don't insert the entry but still return ok, as if the entry inserted
// into cache and get evicted immediately.
last_reference_list.push_back(e);
} else {
if (free_handle_on_fail) {
delete[] reinterpret_cast<char*>(e);
*handle = nullptr;
}
s = Status::Incomplete("Insert failed due to LRU cache being full.");
}
} else {
// Insert into the cache. Note that the cache might get larger than its
// capacity if not enough space was freed up.
LRUHandle* old = table_.Insert(e);
usage_ += total_charge;
if (old != nullptr) {
s = Status::OkOverwritten();
assert(old->InCache());
old->SetInCache(false);
if (!old->HasRefs()) {
// old is on LRU because it's in cache and its reference count is 0.
LRU_Remove(old);
size_t old_total_charge =
old->CalcTotalCharge(metadata_charge_policy_);
assert(usage_ >= old_total_charge);
usage_ -= old_total_charge;
last_reference_list.push_back(old);
}
}
if (handle == nullptr) {
LRU_Insert(e);
} else {
// If caller already holds a ref, no need to take one here.
if (!e->HasRefs()) {
e->Ref();
}
*handle = reinterpret_cast<Cache::Handle*>(e);
}
}
}
// Try to insert the evicted entries into the secondary cache.
// Free the entries here outside of mutex for performance reasons.
for (auto entry : last_reference_list) {
if (secondary_cache_ && entry->IsSecondaryCacheCompatible() &&
!entry->IsInSecondaryCache()) {
secondary_cache_->Insert(entry->key(), entry->value, entry->info_.helper)
.PermitUncheckedError();
}
entry->Free();
}
return s;
}
void LRUCacheShard::Promote(LRUHandle* e) {
SecondaryCacheResultHandle* secondary_handle = e->sec_handle;
assert(secondary_handle->IsReady());
e->SetIncomplete(false);
e->SetInCache(true);
e->value = secondary_handle->Value();
e->charge = secondary_handle->Size();
delete secondary_handle;
// This call could fail if the cache is over capacity and
// strict_capacity_limit_ is true. In such a case, we don't want
// InsertItem() to free the handle, since the item is already in memory
// and the caller will most likely just read from disk if we erase it here.
if (e->value) {
Cache::Handle* handle = reinterpret_cast<Cache::Handle*>(e);
Status s = InsertItem(e, &handle, /*free_handle_on_fail=*/false);
if (!s.ok()) {
// Item is in memory, but not accounted against the cache capacity.
// When the handle is released, the item should get deleted.
assert(!e->InCache());
}
} else {
// Since the secondary cache lookup failed, mark the item as not in cache
// Don't charge the cache as its only metadata that'll shortly be released
MutexLock l(&mutex_);
e->charge = 0;
e->SetInCache(false);
}
}
Cache::Handle* LRUCacheShard::Lookup(
const Slice& key, uint32_t hash,
const ShardedCache::CacheItemHelper* helper,
const ShardedCache::CreateCallback& create_cb, Cache::Priority priority,
bool wait, Statistics* stats) {
LRUHandle* e = nullptr;
{
MutexLock l(&mutex_);
e = table_.Lookup(key, hash);
if (e != nullptr) {
assert(e->InCache());
if (!e->HasRefs()) {
// The entry is in LRU since it's in hash and has no external references
LRU_Remove(e);
}
e->Ref();
e->SetHit();
}
}
// If handle table lookup failed, then allocate a handle outside the
// mutex if we're going to lookup in the secondary cache.
// Only support synchronous for now.
// TODO: Support asynchronous lookup in secondary cache
if (!e && secondary_cache_ && helper && helper->saveto_cb) {
// For objects from the secondary cache, we expect the caller to provide
// a way to create/delete the primary cache object. The only case where
// a deleter would not be required is for dummy entries inserted for
// accounting purposes, which we won't demote to the secondary cache
// anyway.
assert(create_cb && helper->del_cb);
bool is_in_sec_cache{false};
std::unique_ptr<SecondaryCacheResultHandle> secondary_handle =
secondary_cache_->Lookup(key, create_cb, wait, is_in_sec_cache);
if (secondary_handle != nullptr) {
e = reinterpret_cast<LRUHandle*>(
new char[sizeof(LRUHandle) - 1 + key.size()]);
e->flags = 0;
e->SetSecondaryCacheCompatible(true);
e->info_.helper = helper;
e->key_length = key.size();
e->hash = hash;
e->refs = 0;
e->next = e->prev = nullptr;
e->SetPriority(priority);
memcpy(e->key_data, key.data(), key.size());
e->value = nullptr;
e->sec_handle = secondary_handle.release();
e->Ref();
if (wait) {
Promote(e);
e->SetIsInSecondaryCache(is_in_sec_cache);
if (!e->value) {
// The secondary cache returned a handle, but the lookup failed.
e->Unref();
e->Free();
e = nullptr;
} else {
PERF_COUNTER_ADD(secondary_cache_hit_count, 1);
RecordTick(stats, SECONDARY_CACHE_HITS);
}
} else {
// If wait is false, we always return a handle and let the caller
// release the handle after checking for success or failure.
e->SetIncomplete(true);
e->SetIsInSecondaryCache(is_in_sec_cache);
// This may be slightly inaccurate, if the lookup eventually fails.
// But the probability is very low.
PERF_COUNTER_ADD(secondary_cache_hit_count, 1);
RecordTick(stats, SECONDARY_CACHE_HITS);
}
}
}
return reinterpret_cast<Cache::Handle*>(e);
}
bool LRUCacheShard::Ref(Cache::Handle* h) {
LRUHandle* e = reinterpret_cast<LRUHandle*>(h);
MutexLock l(&mutex_);
// To create another reference - entry must be already externally referenced.
assert(e->HasRefs());
e->Ref();
return true;
}
void LRUCacheShard::SetHighPriorityPoolRatio(double high_pri_pool_ratio) {
MutexLock l(&mutex_);
high_pri_pool_ratio_ = high_pri_pool_ratio;
high_pri_pool_capacity_ = capacity_ * high_pri_pool_ratio_;
MaintainPoolSize();
}
bool LRUCacheShard::Release(Cache::Handle* handle, bool erase_if_last_ref) {
if (handle == nullptr) {
return false;
}
LRUHandle* e = reinterpret_cast<LRUHandle*>(handle);
bool last_reference = false;
{
MutexLock l(&mutex_);
last_reference = e->Unref();
if (last_reference && e->InCache()) {
// The item is still in cache, and nobody else holds a reference to it.
if (usage_ > capacity_ || erase_if_last_ref) {
// The LRU list must be empty since the cache is full.
assert(lru_.next == &lru_ || erase_if_last_ref);
// Take this opportunity and remove the item.
table_.Remove(e->key(), e->hash);
e->SetInCache(false);
} else {
// Put the item back on the LRU list, and don't free it.
LRU_Insert(e);
last_reference = false;
}
}
// If it was the last reference, and the entry is either not secondary
// cache compatible (i.e a dummy entry for accounting), or is secondary
// cache compatible and has a non-null value, then decrement the cache
// usage. If value is null in the latter case, taht means the lookup
// failed and we didn't charge the cache.
if (last_reference && (!e->IsSecondaryCacheCompatible() || e->value)) {
size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
assert(usage_ >= total_charge);
usage_ -= total_charge;
}
}
// Free the entry here outside of mutex for performance reasons.
if (last_reference) {
e->Free();
}
return last_reference;
}
Status LRUCacheShard::Insert(const Slice& key, uint32_t hash, void* value,
size_t charge,
void (*deleter)(const Slice& key, void* value),
const Cache::CacheItemHelper* helper,
Cache::Handle** handle, Cache::Priority priority) {
// Allocate the memory here outside of the mutex.
// If the cache is full, we'll have to release it.
// It shouldn't happen very often though.
LRUHandle* e = reinterpret_cast<LRUHandle*>(
new char[sizeof(LRUHandle) - 1 + key.size()]);
e->value = value;
e->flags = 0;
if (helper) {
e->SetSecondaryCacheCompatible(true);
e->info_.helper = helper;
} else {
#ifdef __SANITIZE_THREAD__
e->is_secondary_cache_compatible_for_tsan = false;
#endif // __SANITIZE_THREAD__
e->info_.deleter = deleter;
}
e->charge = charge;
e->key_length = key.size();
e->hash = hash;
e->refs = 0;
e->next = e->prev = nullptr;
e->SetInCache(true);
e->SetPriority(priority);
memcpy(e->key_data, key.data(), key.size());
return InsertItem(e, handle, /* free_handle_on_fail */ true);
}
void LRUCacheShard::Erase(const Slice& key, uint32_t hash) {
LRUHandle* e;
bool last_reference = false;
{
MutexLock l(&mutex_);
e = table_.Remove(key, hash);
if (e != nullptr) {
assert(e->InCache());
e->SetInCache(false);
if (!e->HasRefs()) {
// The entry is in LRU since it's in hash and has no external references
LRU_Remove(e);
size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
assert(usage_ >= total_charge);
usage_ -= total_charge;
last_reference = true;
}
}
}
// Free the entry here outside of mutex for performance reasons.
// last_reference will only be true if e != nullptr.
if (last_reference) {
e->Free();
}
}
bool LRUCacheShard::IsReady(Cache::Handle* handle) {
LRUHandle* e = reinterpret_cast<LRUHandle*>(handle);
MutexLock l(&mutex_);
bool ready = true;
if (e->IsPending()) {
assert(secondary_cache_);
assert(e->sec_handle);
ready = e->sec_handle->IsReady();
}
return ready;
}
size_t LRUCacheShard::GetUsage() const {
MutexLock l(&mutex_);
return usage_;
}
size_t LRUCacheShard::GetPinnedUsage() const {
MutexLock l(&mutex_);
assert(usage_ >= lru_usage_);
return usage_ - lru_usage_;
}
std::string LRUCacheShard::GetPrintableOptions() const {
const int kBufferSize = 200;
char buffer[kBufferSize];
{
MutexLock l(&mutex_);
snprintf(buffer, kBufferSize, " high_pri_pool_ratio: %.3lf\n",
high_pri_pool_ratio_);
}
return std::string(buffer);
}
LRUCache::LRUCache(size_t capacity, int num_shard_bits,
bool strict_capacity_limit, double high_pri_pool_ratio,
std::shared_ptr<MemoryAllocator> allocator,
bool use_adaptive_mutex,
CacheMetadataChargePolicy metadata_charge_policy,
const std::shared_ptr<SecondaryCache>& secondary_cache)
: ShardedCache(capacity, num_shard_bits, strict_capacity_limit,
std::move(allocator)) {
num_shards_ = 1 << num_shard_bits;
shards_ = reinterpret_cast<LRUCacheShard*>(
port::cacheline_aligned_alloc(sizeof(LRUCacheShard) * num_shards_));
size_t per_shard = (capacity + (num_shards_ - 1)) / num_shards_;
for (int i = 0; i < num_shards_; i++) {
new (&shards_[i]) LRUCacheShard(
per_shard, strict_capacity_limit, high_pri_pool_ratio,
use_adaptive_mutex, metadata_charge_policy,
/* max_upper_hash_bits */ 32 - num_shard_bits, secondary_cache);
}
secondary_cache_ = secondary_cache;
}
LRUCache::~LRUCache() {
if (shards_ != nullptr) {
assert(num_shards_ > 0);
for (int i = 0; i < num_shards_; i++) {
shards_[i].~LRUCacheShard();
}
port::cacheline_aligned_free(shards_);
}
}
CacheShard* LRUCache::GetShard(uint32_t shard) {
return reinterpret_cast<CacheShard*>(&shards_[shard]);
}
const CacheShard* LRUCache::GetShard(uint32_t shard) const {
return reinterpret_cast<CacheShard*>(&shards_[shard]);
}
void* LRUCache::Value(Handle* handle) {
return reinterpret_cast<const LRUHandle*>(handle)->value;
}
size_t LRUCache::GetCharge(Handle* handle) const {
return reinterpret_cast<const LRUHandle*>(handle)->charge;
}
Cache::DeleterFn LRUCache::GetDeleter(Handle* handle) const {
auto h = reinterpret_cast<const LRUHandle*>(handle);
if (h->IsSecondaryCacheCompatible()) {
return h->info_.helper->del_cb;
} else {
return h->info_.deleter;
}
}
uint32_t LRUCache::GetHash(Handle* handle) const {
return reinterpret_cast<const LRUHandle*>(handle)->hash;
}
void LRUCache::DisownData() {
// Leak data only if that won't generate an ASAN/valgrind warning.
if (!kMustFreeHeapAllocations) {
shards_ = nullptr;
num_shards_ = 0;
}
}
size_t LRUCache::TEST_GetLRUSize() {
size_t lru_size_of_all_shards = 0;
for (int i = 0; i < num_shards_; i++) {
lru_size_of_all_shards += shards_[i].TEST_GetLRUSize();
}
return lru_size_of_all_shards;
}
double LRUCache::GetHighPriPoolRatio() {
double result = 0.0;
if (num_shards_ > 0) {
result = shards_[0].GetHighPriPoolRatio();
}
return result;
}
void LRUCache::WaitAll(std::vector<Handle*>& handles) {
if (secondary_cache_) {
std::vector<SecondaryCacheResultHandle*> sec_handles;
sec_handles.reserve(handles.size());
for (Handle* handle : handles) {
if (!handle) {
continue;
}
LRUHandle* lru_handle = reinterpret_cast<LRUHandle*>(handle);
if (!lru_handle->IsPending()) {
continue;
}
sec_handles.emplace_back(lru_handle->sec_handle);
}
secondary_cache_->WaitAll(sec_handles);
for (Handle* handle : handles) {
if (!handle) {
continue;
}
LRUHandle* lru_handle = reinterpret_cast<LRUHandle*>(handle);
if (!lru_handle->IsPending()) {
continue;
}
uint32_t hash = GetHash(handle);
LRUCacheShard* shard = static_cast<LRUCacheShard*>(GetShard(Shard(hash)));
shard->Promote(lru_handle);
}
}
}
} // namespace lru_cache
std::shared_ptr<Cache> NewLRUCache(
size_t capacity, int num_shard_bits, bool strict_capacity_limit,
double high_pri_pool_ratio,
std::shared_ptr<MemoryAllocator> memory_allocator, bool use_adaptive_mutex,
CacheMetadataChargePolicy metadata_charge_policy,
const std::shared_ptr<SecondaryCache>& secondary_cache) {
if (num_shard_bits >= 20) {
return nullptr; // The cache cannot be sharded into too many fine pieces.
}
if (high_pri_pool_ratio < 0.0 || high_pri_pool_ratio > 1.0) {
// Invalid high_pri_pool_ratio
return nullptr;
}
if (num_shard_bits < 0) {
num_shard_bits = GetDefaultCacheShardBits(capacity);
}
return std::make_shared<LRUCache>(
capacity, num_shard_bits, strict_capacity_limit, high_pri_pool_ratio,
std::move(memory_allocator), use_adaptive_mutex, metadata_charge_policy,
secondary_cache);
}
std::shared_ptr<Cache> NewLRUCache(const LRUCacheOptions& cache_opts) {
return NewLRUCache(
cache_opts.capacity, cache_opts.num_shard_bits,
cache_opts.strict_capacity_limit, cache_opts.high_pri_pool_ratio,
cache_opts.memory_allocator, cache_opts.use_adaptive_mutex,
cache_opts.metadata_charge_policy, cache_opts.secondary_cache);
}
std::shared_ptr<Cache> NewLRUCache(
size_t capacity, int num_shard_bits, bool strict_capacity_limit,
double high_pri_pool_ratio,
std::shared_ptr<MemoryAllocator> memory_allocator, bool use_adaptive_mutex,
CacheMetadataChargePolicy metadata_charge_policy) {
return NewLRUCache(capacity, num_shard_bits, strict_capacity_limit,
high_pri_pool_ratio, memory_allocator, use_adaptive_mutex,
metadata_charge_policy, nullptr);
}
} // namespace ROCKSDB_NAMESPACE

489
cache/lru_cache.h vendored
View File

@ -1,489 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#pragma once
#include <memory>
#include <string>
#include "cache/sharded_cache.h"
#include "port/lang.h"
#include "port/malloc.h"
#include "port/port.h"
#include "rocksdb/secondary_cache.h"
#include "util/autovector.h"
namespace ROCKSDB_NAMESPACE {
namespace lru_cache {
// LRU cache implementation. This class is not thread-safe.
// An entry is a variable length heap-allocated structure.
// Entries are referenced by cache and/or by any external entity.
// The cache keeps all its entries in a hash table. Some elements
// are also stored on LRU list.
//
// LRUHandle can be in these states:
// 1. Referenced externally AND in hash table.
// In that case the entry is *not* in the LRU list
// (refs >= 1 && in_cache == true)
// 2. Not referenced externally AND in hash table.
// In that case the entry is in the LRU list and can be freed.
// (refs == 0 && in_cache == true)
// 3. Referenced externally AND not in hash table.
// In that case the entry is not in the LRU list and not in hash table.
// The entry can be freed when refs becomes 0.
// (refs >= 1 && in_cache == false)
//
// All newly created LRUHandles are in state 1. If you call
// LRUCacheShard::Release on entry in state 1, it will go into state 2.
// To move from state 1 to state 3, either call LRUCacheShard::Erase or
// LRUCacheShard::Insert with the same key (but possibly different value).
// To move from state 2 to state 1, use LRUCacheShard::Lookup.
// Before destruction, make sure that no handles are in state 1. This means
// that any successful LRUCacheShard::Lookup/LRUCacheShard::Insert have a
// matching LRUCache::Release (to move into state 2) or LRUCacheShard::Erase
// (to move into state 3).
struct LRUHandle {
void* value;
union Info {
Info() {}
~Info() {}
Cache::DeleterFn deleter;
const ShardedCache::CacheItemHelper* helper;
} info_;
// An entry is not added to the LRUHandleTable until the secondary cache
// lookup is complete, so its safe to have this union.
union {
LRUHandle* next_hash;
SecondaryCacheResultHandle* sec_handle;
};
LRUHandle* next;
LRUHandle* prev;
size_t charge; // TODO(opt): Only allow uint32_t?
size_t key_length;
// The hash of key(). Used for fast sharding and comparisons.
uint32_t hash;
// The number of external refs to this entry. The cache itself is not counted.
uint32_t refs;
enum Flags : uint8_t {
// Whether this entry is referenced by the hash table.
IN_CACHE = (1 << 0),
// Whether this entry is high priority entry.
IS_HIGH_PRI = (1 << 1),
// Whether this entry is in high-pri pool.
IN_HIGH_PRI_POOL = (1 << 2),
// Whether this entry has had any lookups (hits).
HAS_HIT = (1 << 3),
// Can this be inserted into the secondary cache.
IS_SECONDARY_CACHE_COMPATIBLE = (1 << 4),
// Is the handle still being read from a lower tier.
IS_PENDING = (1 << 5),
// Whether this handle is still in a lower tier
IS_IN_SECONDARY_CACHE = (1 << 6),
};
uint8_t flags;
#ifdef __SANITIZE_THREAD__
// TSAN can report a false data race on flags, where one thread is writing
// to one of the mutable bits and another thread is reading this immutable
// bit. So precisely suppress that TSAN warning, we separate out this bit
// during TSAN runs.
bool is_secondary_cache_compatible_for_tsan;
#endif // __SANITIZE_THREAD__
// Beginning of the key (MUST BE THE LAST FIELD IN THIS STRUCT!)
char key_data[1];
Slice key() const { return Slice(key_data, key_length); }
// Increase the reference count by 1.
void Ref() { refs++; }
// Just reduce the reference count by 1. Return true if it was last reference.
bool Unref() {
assert(refs > 0);
refs--;
return refs == 0;
}
// Return true if there are external refs, false otherwise.
bool HasRefs() const { return refs > 0; }
bool InCache() const { return flags & IN_CACHE; }
bool IsHighPri() const { return flags & IS_HIGH_PRI; }
bool InHighPriPool() const { return flags & IN_HIGH_PRI_POOL; }
bool HasHit() const { return flags & HAS_HIT; }
bool IsSecondaryCacheCompatible() const {
#ifdef __SANITIZE_THREAD__
return is_secondary_cache_compatible_for_tsan;
#else
return flags & IS_SECONDARY_CACHE_COMPATIBLE;
#endif // __SANITIZE_THREAD__
}
bool IsPending() const { return flags & IS_PENDING; }
bool IsInSecondaryCache() const { return flags & IS_IN_SECONDARY_CACHE; }
void SetInCache(bool in_cache) {
if (in_cache) {
flags |= IN_CACHE;
} else {
flags &= ~IN_CACHE;
}
}
void SetPriority(Cache::Priority priority) {
if (priority == Cache::Priority::HIGH) {
flags |= IS_HIGH_PRI;
} else {
flags &= ~IS_HIGH_PRI;
}
}
void SetInHighPriPool(bool in_high_pri_pool) {
if (in_high_pri_pool) {
flags |= IN_HIGH_PRI_POOL;
} else {
flags &= ~IN_HIGH_PRI_POOL;
}
}
void SetHit() { flags |= HAS_HIT; }
void SetSecondaryCacheCompatible(bool compat) {
if (compat) {
flags |= IS_SECONDARY_CACHE_COMPATIBLE;
} else {
flags &= ~IS_SECONDARY_CACHE_COMPATIBLE;
}
#ifdef __SANITIZE_THREAD__
is_secondary_cache_compatible_for_tsan = compat;
#endif // __SANITIZE_THREAD__
}
void SetIncomplete(bool incomp) {
if (incomp) {
flags |= IS_PENDING;
} else {
flags &= ~IS_PENDING;
}
}
void SetIsInSecondaryCache(bool is_in_secondary_cache) {
if (is_in_secondary_cache) {
flags |= IS_IN_SECONDARY_CACHE;
} else {
flags &= ~IS_IN_SECONDARY_CACHE;
}
}
void Free() {
assert(refs == 0);
#ifdef __SANITIZE_THREAD__
// Here we can safely assert they are the same without a data race reported
assert(((flags & IS_SECONDARY_CACHE_COMPATIBLE) != 0) ==
is_secondary_cache_compatible_for_tsan);
#endif // __SANITIZE_THREAD__
if (!IsSecondaryCacheCompatible() && info_.deleter) {
(*info_.deleter)(key(), value);
} else if (IsSecondaryCacheCompatible()) {
if (IsPending()) {
assert(sec_handle != nullptr);
SecondaryCacheResultHandle* tmp_sec_handle = sec_handle;
tmp_sec_handle->Wait();
value = tmp_sec_handle->Value();
delete tmp_sec_handle;
}
if (value) {
(*info_.helper->del_cb)(key(), value);
}
}
delete[] reinterpret_cast<char*>(this);
}
// Calculate the memory usage by metadata.
inline size_t CalcTotalCharge(
CacheMetadataChargePolicy metadata_charge_policy) {
size_t meta_charge = 0;
if (metadata_charge_policy == kFullChargeCacheMetadata) {
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
meta_charge += malloc_usable_size(static_cast<void*>(this));
#else
// This is the size that is used when a new handle is created.
meta_charge += sizeof(LRUHandle) - 1 + key_length;
#endif
}
return charge + meta_charge;
}
};
// We provide our own simple hash table since it removes a whole bunch
// of porting hacks and is also faster than some of the built-in hash
// table implementations in some of the compiler/runtime combinations
// we have tested. E.g., readrandom speeds up by ~5% over the g++
// 4.4.3's builtin hashtable.
class LRUHandleTable {
public:
// If the table uses more hash bits than `max_upper_hash_bits`,
// it will eat into the bits used for sharding, which are constant
// for a given LRUHandleTable.
explicit LRUHandleTable(int max_upper_hash_bits);
~LRUHandleTable();
LRUHandle* Lookup(const Slice& key, uint32_t hash);
LRUHandle* Insert(LRUHandle* h);
LRUHandle* Remove(const Slice& key, uint32_t hash);
template <typename T>
void ApplyToEntriesRange(T func, uint32_t index_begin, uint32_t index_end) {
for (uint32_t i = index_begin; i < index_end; i++) {
LRUHandle* h = list_[i];
while (h != nullptr) {
auto n = h->next_hash;
assert(h->InCache());
func(h);
h = n;
}
}
}
int GetLengthBits() const { return length_bits_; }
private:
// Return a pointer to slot that points to a cache entry that
// matches key/hash. If there is no such cache entry, return a
// pointer to the trailing slot in the corresponding linked list.
LRUHandle** FindPointer(const Slice& key, uint32_t hash);
void Resize();
// Number of hash bits (upper because lower bits used for sharding)
// used for table index. Length == 1 << length_bits_
int length_bits_;
// The table consists of an array of buckets where each bucket is
// a linked list of cache entries that hash into the bucket.
std::unique_ptr<LRUHandle*[]> list_;
// Number of elements currently in the table.
uint32_t elems_;
// Set from max_upper_hash_bits (see constructor).
const int max_length_bits_;
};
// A single shard of sharded cache.
class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard {
public:
LRUCacheShard(size_t capacity, bool strict_capacity_limit,
double high_pri_pool_ratio, bool use_adaptive_mutex,
CacheMetadataChargePolicy metadata_charge_policy,
int max_upper_hash_bits,
const std::shared_ptr<SecondaryCache>& secondary_cache);
virtual ~LRUCacheShard() override = default;
// Separate from constructor so caller can easily make an array of LRUCache
// if current usage is more than new capacity, the function will attempt to
// free the needed space.
virtual void SetCapacity(size_t capacity) override;
// Set the flag to reject insertion if cache if full.
virtual void SetStrictCapacityLimit(bool strict_capacity_limit) override;
// Set percentage of capacity reserved for high-pri cache entries.
void SetHighPriorityPoolRatio(double high_pri_pool_ratio);
// Like Cache methods, but with an extra "hash" parameter.
virtual Status Insert(const Slice& key, uint32_t hash, void* value,
size_t charge, Cache::DeleterFn deleter,
Cache::Handle** handle,
Cache::Priority priority) override {
return Insert(key, hash, value, charge, deleter, nullptr, handle, priority);
}
virtual Status Insert(const Slice& key, uint32_t hash, void* value,
const Cache::CacheItemHelper* helper, size_t charge,
Cache::Handle** handle,
Cache::Priority priority) override {
assert(helper);
return Insert(key, hash, value, charge, nullptr, helper, handle, priority);
}
// If helper_cb is null, the values of the following arguments don't matter.
virtual Cache::Handle* Lookup(const Slice& key, uint32_t hash,
const ShardedCache::CacheItemHelper* helper,
const ShardedCache::CreateCallback& create_cb,
ShardedCache::Priority priority, bool wait,
Statistics* stats) override;
virtual Cache::Handle* Lookup(const Slice& key, uint32_t hash) override {
return Lookup(key, hash, nullptr, nullptr, Cache::Priority::LOW, true,
nullptr);
}
virtual bool Release(Cache::Handle* handle, bool /*useful*/,
bool erase_if_last_ref) override {
return Release(handle, erase_if_last_ref);
}
virtual bool IsReady(Cache::Handle* /*handle*/) override;
virtual void Wait(Cache::Handle* /*handle*/) override {}
virtual bool Ref(Cache::Handle* handle) override;
virtual bool Release(Cache::Handle* handle,
bool erase_if_last_ref = false) override;
virtual void Erase(const Slice& key, uint32_t hash) override;
// Although in some platforms the update of size_t is atomic, to make sure
// GetUsage() and GetPinnedUsage() work correctly under any platform, we'll
// protect them with mutex_.
virtual size_t GetUsage() const override;
virtual size_t GetPinnedUsage() const override;
virtual void ApplyToSomeEntries(
const std::function<void(const Slice& key, void* value, size_t charge,
DeleterFn deleter)>& callback,
uint32_t average_entries_per_lock, uint32_t* state) override;
virtual void EraseUnRefEntries() override;
virtual std::string GetPrintableOptions() const override;
void TEST_GetLRUList(LRUHandle** lru, LRUHandle** lru_low_pri);
// Retrieves number of elements in LRU, for unit test purpose only.
// Not threadsafe.
size_t TEST_GetLRUSize();
// Retrieves high pri pool ratio
double GetHighPriPoolRatio();
private:
friend class LRUCache;
// Insert an item into the hash table and, if handle is null, insert into
// the LRU list. Older items are evicted as necessary. If the cache is full
// and free_handle_on_fail is true, the item is deleted and handle is set to
// nullptr.
Status InsertItem(LRUHandle* item, Cache::Handle** handle,
bool free_handle_on_fail);
Status Insert(const Slice& key, uint32_t hash, void* value, size_t charge,
DeleterFn deleter, const Cache::CacheItemHelper* helper,
Cache::Handle** handle, Cache::Priority priority);
// Promote an item looked up from the secondary cache to the LRU cache.
// The item may be still in the secondary cache.
// It is only inserted into the hash table and not the LRU list, and only
// if the cache is not at full capacity, as is the case during Insert. The
// caller should hold a reference on the LRUHandle. When the caller releases
// the last reference, the item is added to the LRU list.
// The item is promoted to the high pri or low pri pool as specified by the
// caller in Lookup.
void Promote(LRUHandle* e);
void LRU_Remove(LRUHandle* e);
void LRU_Insert(LRUHandle* e);
// Overflow the last entry in high-pri pool to low-pri pool until size of
// high-pri pool is no larger than the size specify by high_pri_pool_pct.
void MaintainPoolSize();
// Free some space following strict LRU policy until enough space
// to hold (usage_ + charge) is freed or the lru list is empty
// This function is not thread safe - it needs to be executed while
// holding the mutex_.
void EvictFromLRU(size_t charge, autovector<LRUHandle*>* deleted);
// Initialized before use.
size_t capacity_;
// Memory size for entries in high-pri pool.
size_t high_pri_pool_usage_;
// Whether to reject insertion if cache reaches its full capacity.
bool strict_capacity_limit_;
// Ratio of capacity reserved for high priority cache entries.
double high_pri_pool_ratio_;
// High-pri pool size, equals to capacity * high_pri_pool_ratio.
// Remember the value to avoid recomputing each time.
double high_pri_pool_capacity_;
// Dummy head of LRU list.
// lru.prev is newest entry, lru.next is oldest entry.
// LRU contains items which can be evicted, ie reference only by cache
LRUHandle lru_;
// Pointer to head of low-pri pool in LRU list.
LRUHandle* lru_low_pri_;
// ------------^^^^^^^^^^^^^-----------
// Not frequently modified data members
// ------------------------------------
//
// We separate data members that are updated frequently from the ones that
// are not frequently updated so that they don't share the same cache line
// which will lead into false cache sharing
//
// ------------------------------------
// Frequently modified data members
// ------------vvvvvvvvvvvvv-----------
LRUHandleTable table_;
// Memory size for entries residing in the cache.
size_t usage_;
// Memory size for entries residing only in the LRU list.
size_t lru_usage_;
// mutex_ protects the following state.
// We don't count mutex_ as the cache's internal state so semantically we
// don't mind mutex_ invoking the non-const actions.
mutable port::Mutex mutex_;
std::shared_ptr<SecondaryCache> secondary_cache_;
};
class LRUCache
#ifdef NDEBUG
final
#endif
: public ShardedCache {
public:
LRUCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit,
double high_pri_pool_ratio,
std::shared_ptr<MemoryAllocator> memory_allocator = nullptr,
bool use_adaptive_mutex = kDefaultToAdaptiveMutex,
CacheMetadataChargePolicy metadata_charge_policy =
kDontChargeCacheMetadata,
const std::shared_ptr<SecondaryCache>& secondary_cache = nullptr);
virtual ~LRUCache();
virtual const char* Name() const override { return "LRUCache"; }
virtual CacheShard* GetShard(uint32_t shard) override;
virtual const CacheShard* GetShard(uint32_t shard) const override;
virtual void* Value(Handle* handle) override;
virtual size_t GetCharge(Handle* handle) const override;
virtual uint32_t GetHash(Handle* handle) const override;
virtual DeleterFn GetDeleter(Handle* handle) const override;
virtual void DisownData() override;
virtual void WaitAll(std::vector<Handle*>& handles) override;
// Retrieves number of elements in LRU, for unit test purpose only.
size_t TEST_GetLRUSize();
// Retrieves high pri pool ratio.
double GetHighPriPoolRatio();
private:
LRUCacheShard* shards_ = nullptr;
int num_shards_ = 0;
std::shared_ptr<SecondaryCache> secondary_cache_;
};
} // namespace lru_cache
using LRUCache = lru_cache::LRUCache;
using LRUHandle = lru_cache::LRUHandle;
using LRUCacheShard = lru_cache::LRUCacheShard;
} // namespace ROCKSDB_NAMESPACE

1855
cache/lru_cache_test.cc vendored

File diff suppressed because it is too large Load Diff

233
cache/sharded_cache.cc vendored
View File

@ -1,233 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "cache/sharded_cache.h"
#include <algorithm>
#include <cstdint>
#include <memory>
#include "util/hash.h"
#include "util/math.h"
#include "util/mutexlock.h"
namespace ROCKSDB_NAMESPACE {
namespace {
inline uint32_t HashSlice(const Slice& s) {
return Lower32of64(GetSliceNPHash64(s));
}
} // namespace
ShardedCache::ShardedCache(size_t capacity, int num_shard_bits,
bool strict_capacity_limit,
std::shared_ptr<MemoryAllocator> allocator)
: Cache(std::move(allocator)),
shard_mask_((uint32_t{1} << num_shard_bits) - 1),
capacity_(capacity),
strict_capacity_limit_(strict_capacity_limit),
last_id_(1) {}
void ShardedCache::SetCapacity(size_t capacity) {
uint32_t num_shards = GetNumShards();
const size_t per_shard = (capacity + (num_shards - 1)) / num_shards;
MutexLock l(&capacity_mutex_);
for (uint32_t s = 0; s < num_shards; s++) {
GetShard(s)->SetCapacity(per_shard);
}
capacity_ = capacity;
}
void ShardedCache::SetStrictCapacityLimit(bool strict_capacity_limit) {
uint32_t num_shards = GetNumShards();
MutexLock l(&capacity_mutex_);
for (uint32_t s = 0; s < num_shards; s++) {
GetShard(s)->SetStrictCapacityLimit(strict_capacity_limit);
}
strict_capacity_limit_ = strict_capacity_limit;
}
Status ShardedCache::Insert(const Slice& key, void* value, size_t charge,
DeleterFn deleter, Handle** handle,
Priority priority) {
uint32_t hash = HashSlice(key);
return GetShard(Shard(hash))
->Insert(key, hash, value, charge, deleter, handle, priority);
}
Status ShardedCache::Insert(const Slice& key, void* value,
const CacheItemHelper* helper, size_t charge,
Handle** handle, Priority priority) {
uint32_t hash = HashSlice(key);
if (!helper) {
return Status::InvalidArgument();
}
return GetShard(Shard(hash))
->Insert(key, hash, value, helper, charge, handle, priority);
}
Cache::Handle* ShardedCache::Lookup(const Slice& key, Statistics* /*stats*/) {
uint32_t hash = HashSlice(key);
return GetShard(Shard(hash))->Lookup(key, hash);
}
Cache::Handle* ShardedCache::Lookup(const Slice& key,
const CacheItemHelper* helper,
const CreateCallback& create_cb,
Priority priority, bool wait,
Statistics* stats) {
uint32_t hash = HashSlice(key);
return GetShard(Shard(hash))
->Lookup(key, hash, helper, create_cb, priority, wait, stats);
}
bool ShardedCache::IsReady(Handle* handle) {
uint32_t hash = GetHash(handle);
return GetShard(Shard(hash))->IsReady(handle);
}
void ShardedCache::Wait(Handle* handle) {
uint32_t hash = GetHash(handle);
GetShard(Shard(hash))->Wait(handle);
}
bool ShardedCache::Ref(Handle* handle) {
uint32_t hash = GetHash(handle);
return GetShard(Shard(hash))->Ref(handle);
}
bool ShardedCache::Release(Handle* handle, bool erase_if_last_ref) {
uint32_t hash = GetHash(handle);
return GetShard(Shard(hash))->Release(handle, erase_if_last_ref);
}
bool ShardedCache::Release(Handle* handle, bool useful,
bool erase_if_last_ref) {
uint32_t hash = GetHash(handle);
return GetShard(Shard(hash))->Release(handle, useful, erase_if_last_ref);
}
void ShardedCache::Erase(const Slice& key) {
uint32_t hash = HashSlice(key);
GetShard(Shard(hash))->Erase(key, hash);
}
uint64_t ShardedCache::NewId() {
return last_id_.fetch_add(1, std::memory_order_relaxed);
}
size_t ShardedCache::GetCapacity() const {
MutexLock l(&capacity_mutex_);
return capacity_;
}
bool ShardedCache::HasStrictCapacityLimit() const {
MutexLock l(&capacity_mutex_);
return strict_capacity_limit_;
}
size_t ShardedCache::GetUsage() const {
// We will not lock the cache when getting the usage from shards.
uint32_t num_shards = GetNumShards();
size_t usage = 0;
for (uint32_t s = 0; s < num_shards; s++) {
usage += GetShard(s)->GetUsage();
}
return usage;
}
size_t ShardedCache::GetUsage(Handle* handle) const {
return GetCharge(handle);
}
size_t ShardedCache::GetPinnedUsage() const {
// We will not lock the cache when getting the usage from shards.
uint32_t num_shards = GetNumShards();
size_t usage = 0;
for (uint32_t s = 0; s < num_shards; s++) {
usage += GetShard(s)->GetPinnedUsage();
}
return usage;
}
void ShardedCache::ApplyToAllEntries(
const std::function<void(const Slice& key, void* value, size_t charge,
DeleterFn deleter)>& callback,
const ApplyToAllEntriesOptions& opts) {
uint32_t num_shards = GetNumShards();
// Iterate over part of each shard, rotating between shards, to
// minimize impact on latency of concurrent operations.
std::unique_ptr<uint32_t[]> states(new uint32_t[num_shards]{});
uint32_t aepl_in_32 = static_cast<uint32_t>(
std::min(size_t{UINT32_MAX}, opts.average_entries_per_lock));
aepl_in_32 = std::min(aepl_in_32, uint32_t{1});
bool remaining_work;
do {
remaining_work = false;
for (uint32_t s = 0; s < num_shards; s++) {
if (states[s] != UINT32_MAX) {
GetShard(s)->ApplyToSomeEntries(callback, aepl_in_32, &states[s]);
remaining_work |= states[s] != UINT32_MAX;
}
}
} while (remaining_work);
}
void ShardedCache::EraseUnRefEntries() {
uint32_t num_shards = GetNumShards();
for (uint32_t s = 0; s < num_shards; s++) {
GetShard(s)->EraseUnRefEntries();
}
}
std::string ShardedCache::GetPrintableOptions() const {
std::string ret;
ret.reserve(20000);
const int kBufferSize = 200;
char buffer[kBufferSize];
{
MutexLock l(&capacity_mutex_);
snprintf(buffer, kBufferSize, " capacity : %" ROCKSDB_PRIszt "\n",
capacity_);
ret.append(buffer);
snprintf(buffer, kBufferSize, " num_shard_bits : %d\n",
GetNumShardBits());
ret.append(buffer);
snprintf(buffer, kBufferSize, " strict_capacity_limit : %d\n",
strict_capacity_limit_);
ret.append(buffer);
}
snprintf(buffer, kBufferSize, " memory_allocator : %s\n",
memory_allocator() ? memory_allocator()->Name() : "None");
ret.append(buffer);
ret.append(GetShard(0)->GetPrintableOptions());
return ret;
}
int GetDefaultCacheShardBits(size_t capacity) {
int num_shard_bits = 0;
size_t min_shard_size = 512L * 1024L; // Every shard is at least 512KB.
size_t num_shards = capacity / min_shard_size;
while (num_shards >>= 1) {
if (++num_shard_bits >= 6) {
// No more than 6.
return num_shard_bits;
}
}
return num_shard_bits;
}
int ShardedCache::GetNumShardBits() const { return BitsSetToOne(shard_mask_); }
uint32_t ShardedCache::GetNumShards() const { return shard_mask_ + 1; }
} // namespace ROCKSDB_NAMESPACE

132
cache/sharded_cache.h vendored
View File

@ -1,132 +0,0 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#pragma once
#include <atomic>
#include <string>
#include "port/port.h"
#include "rocksdb/cache.h"
namespace ROCKSDB_NAMESPACE {
// Single cache shard interface.
class CacheShard {
public:
CacheShard() = default;
virtual ~CacheShard() = default;
using DeleterFn = Cache::DeleterFn;
virtual Status Insert(const Slice& key, uint32_t hash, void* value,
size_t charge, DeleterFn deleter,
Cache::Handle** handle, Cache::Priority priority) = 0;
virtual Status Insert(const Slice& key, uint32_t hash, void* value,
const Cache::CacheItemHelper* helper, size_t charge,
Cache::Handle** handle, Cache::Priority priority) = 0;
virtual Cache::Handle* Lookup(const Slice& key, uint32_t hash) = 0;
virtual Cache::Handle* Lookup(const Slice& key, uint32_t hash,
const Cache::CacheItemHelper* helper,
const Cache::CreateCallback& create_cb,
Cache::Priority priority, bool wait,
Statistics* stats) = 0;
virtual bool Release(Cache::Handle* handle, bool useful,
bool erase_if_last_ref) = 0;
virtual bool IsReady(Cache::Handle* handle) = 0;
virtual void Wait(Cache::Handle* handle) = 0;
virtual bool Ref(Cache::Handle* handle) = 0;
virtual bool Release(Cache::Handle* handle, bool erase_if_last_ref) = 0;
virtual void Erase(const Slice& key, uint32_t hash) = 0;
virtual void SetCapacity(size_t capacity) = 0;
virtual void SetStrictCapacityLimit(bool strict_capacity_limit) = 0;
virtual size_t GetUsage() const = 0;
virtual size_t GetPinnedUsage() const = 0;
// Handles iterating over roughly `average_entries_per_lock` entries, using
// `state` to somehow record where it last ended up. Caller initially uses
// *state == 0 and implementation sets *state = UINT32_MAX to indicate
// completion.
virtual void ApplyToSomeEntries(
const std::function<void(const Slice& key, void* value, size_t charge,
DeleterFn deleter)>& callback,
uint32_t average_entries_per_lock, uint32_t* state) = 0;
virtual void EraseUnRefEntries() = 0;
virtual std::string GetPrintableOptions() const { return ""; }
void set_metadata_charge_policy(
CacheMetadataChargePolicy metadata_charge_policy) {
metadata_charge_policy_ = metadata_charge_policy;
}
protected:
CacheMetadataChargePolicy metadata_charge_policy_ = kDontChargeCacheMetadata;
};
// Generic cache interface which shards cache by hash of keys. 2^num_shard_bits
// shards will be created, with capacity split evenly to each of the shards.
// Keys are sharded by the highest num_shard_bits bits of hash value.
class ShardedCache : public Cache {
public:
ShardedCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit,
std::shared_ptr<MemoryAllocator> memory_allocator = nullptr);
virtual ~ShardedCache() = default;
virtual CacheShard* GetShard(uint32_t shard) = 0;
virtual const CacheShard* GetShard(uint32_t shard) const = 0;
virtual uint32_t GetHash(Handle* handle) const = 0;
virtual void SetCapacity(size_t capacity) override;
virtual void SetStrictCapacityLimit(bool strict_capacity_limit) override;
virtual Status Insert(const Slice& key, void* value, size_t charge,
DeleterFn deleter, Handle** handle,
Priority priority) override;
virtual Status Insert(const Slice& key, void* value,
const CacheItemHelper* helper, size_t chargge,
Handle** handle = nullptr,
Priority priority = Priority::LOW) override;
virtual Handle* Lookup(const Slice& key, Statistics* stats) override;
virtual Handle* Lookup(const Slice& key, const CacheItemHelper* helper,
const CreateCallback& create_cb, Priority priority,
bool wait, Statistics* stats = nullptr) override;
virtual bool Release(Handle* handle, bool useful,
bool erase_if_last_ref = false) override;
virtual bool IsReady(Handle* handle) override;
virtual void Wait(Handle* handle) override;
virtual bool Ref(Handle* handle) override;
virtual bool Release(Handle* handle, bool erase_if_last_ref = false) override;
virtual void Erase(const Slice& key) override;
virtual uint64_t NewId() override;
virtual size_t GetCapacity() const override;
virtual bool HasStrictCapacityLimit() const override;
virtual size_t GetUsage() const override;
virtual size_t GetUsage(Handle* handle) const override;
virtual size_t GetPinnedUsage() const override;
virtual void ApplyToAllEntries(
const std::function<void(const Slice& key, void* value, size_t charge,
DeleterFn deleter)>& callback,
const ApplyToAllEntriesOptions& opts) override;
virtual void EraseUnRefEntries() override;
virtual std::string GetPrintableOptions() const override;
int GetNumShardBits() const;
uint32_t GetNumShards() const;
protected:
inline uint32_t Shard(uint32_t hash) { return hash & shard_mask_; }
private:
const uint32_t shard_mask_;
mutable port::Mutex capacity_mutex_;
size_t capacity_;
bool strict_capacity_limit_;
std::atomic<uint64_t> last_id_;
};
extern int GetDefaultCacheShardBits(size_t capacity);
} // namespace ROCKSDB_NAMESPACE

View File

@ -1,54 +0,0 @@
@PACKAGE_INIT@
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/modules")
include(CMakeFindDependencyMacro)
set(GFLAGS_USE_TARGET_NAMESPACE @GFLAGS_USE_TARGET_NAMESPACE@)
if(@WITH_JEMALLOC@)
find_dependency(JeMalloc)
endif()
if(@WITH_GFLAGS@)
find_dependency(gflags CONFIG)
if(NOT gflags_FOUND)
find_dependency(gflags)
endif()
endif()
if(@WITH_SNAPPY@)
find_dependency(Snappy CONFIG)
if(NOT Snappy_FOUND)
find_dependency(Snappy)
endif()
endif()
if(@WITH_ZLIB@)
find_dependency(ZLIB)
endif()
if(@WITH_BZ2@)
find_dependency(BZip2)
endif()
if(@WITH_LZ4@)
find_dependency(lz4)
endif()
if(@WITH_ZSTD@)
find_dependency(zstd)
endif()
if(@WITH_NUMA@)
find_dependency(NUMA)
endif()
if(@WITH_TBB@)
find_dependency(TBB)
endif()
find_dependency(Threads)
include("${CMAKE_CURRENT_LIST_DIR}/RocksDBTargets.cmake")
check_required_components(RocksDB)

View File

@ -1,7 +0,0 @@
macro(get_cxx_std_flags FLAGS_VARIABLE)
if( CMAKE_CXX_STANDARD_REQUIRED )
set(${FLAGS_VARIABLE} ${CMAKE_CXX${CMAKE_CXX_STANDARD}_STANDARD_COMPILE_OPTION})
else()
set(${FLAGS_VARIABLE} ${CMAKE_CXX${CMAKE_CXX_STANDARD}_EXTENSION_COMPILE_OPTION})
endif()
endmacro()

View File

@ -1,29 +0,0 @@
# - Find JeMalloc library
# Find the native JeMalloc includes and library
#
# JeMalloc_INCLUDE_DIRS - where to find jemalloc.h, etc.
# JeMalloc_LIBRARIES - List of libraries when using jemalloc.
# JeMalloc_FOUND - True if jemalloc found.
find_path(JeMalloc_INCLUDE_DIRS
NAMES jemalloc/jemalloc.h
HINTS ${JEMALLOC_ROOT_DIR}/include)
find_library(JeMalloc_LIBRARIES
NAMES jemalloc
HINTS ${JEMALLOC_ROOT_DIR}/lib)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(JeMalloc DEFAULT_MSG JeMalloc_LIBRARIES JeMalloc_INCLUDE_DIRS)
mark_as_advanced(
JeMalloc_LIBRARIES
JeMalloc_INCLUDE_DIRS)
if(JeMalloc_FOUND AND NOT (TARGET JeMalloc::JeMalloc))
add_library (JeMalloc::JeMalloc UNKNOWN IMPORTED)
set_target_properties(JeMalloc::JeMalloc
PROPERTIES
IMPORTED_LOCATION ${JeMalloc_LIBRARIES}
INTERFACE_INCLUDE_DIRECTORIES ${JeMalloc_INCLUDE_DIRS})
endif()

View File

@ -1,29 +0,0 @@
# - Find NUMA
# Find the NUMA library and includes
#
# NUMA_INCLUDE_DIRS - where to find numa.h, etc.
# NUMA_LIBRARIES - List of libraries when using NUMA.
# NUMA_FOUND - True if NUMA found.
find_path(NUMA_INCLUDE_DIRS
NAMES numa.h numaif.h
HINTS ${NUMA_ROOT_DIR}/include)
find_library(NUMA_LIBRARIES
NAMES numa
HINTS ${NUMA_ROOT_DIR}/lib)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(NUMA DEFAULT_MSG NUMA_LIBRARIES NUMA_INCLUDE_DIRS)
mark_as_advanced(
NUMA_LIBRARIES
NUMA_INCLUDE_DIRS)
if(NUMA_FOUND AND NOT (TARGET NUMA::NUMA))
add_library (NUMA::NUMA UNKNOWN IMPORTED)
set_target_properties(NUMA::NUMA
PROPERTIES
IMPORTED_LOCATION ${NUMA_LIBRARIES}
INTERFACE_INCLUDE_DIRECTORIES ${NUMA_INCLUDE_DIRS})
endif()

View File

@ -1,29 +0,0 @@
# - Find Snappy
# Find the snappy compression library and includes
#
# Snappy_INCLUDE_DIRS - where to find snappy.h, etc.
# Snappy_LIBRARIES - List of libraries when using snappy.
# Snappy_FOUND - True if snappy found.
find_path(Snappy_INCLUDE_DIRS
NAMES snappy.h
HINTS ${snappy_ROOT_DIR}/include)
find_library(Snappy_LIBRARIES
NAMES snappy
HINTS ${snappy_ROOT_DIR}/lib)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(Snappy DEFAULT_MSG Snappy_LIBRARIES Snappy_INCLUDE_DIRS)
mark_as_advanced(
Snappy_LIBRARIES
Snappy_INCLUDE_DIRS)
if(Snappy_FOUND AND NOT (TARGET Snappy::snappy))
add_library (Snappy::snappy UNKNOWN IMPORTED)
set_target_properties(Snappy::snappy
PROPERTIES
IMPORTED_LOCATION ${Snappy_LIBRARIES}
INTERFACE_INCLUDE_DIRECTORIES ${Snappy_INCLUDE_DIRS})
endif()

View File

@ -1,33 +0,0 @@
# - Find TBB
# Find the Thread Building Blocks library and includes
#
# TBB_INCLUDE_DIRS - where to find tbb.h, etc.
# TBB_LIBRARIES - List of libraries when using TBB.
# TBB_FOUND - True if TBB found.
if(NOT DEFINED TBB_ROOT_DIR)
set(TBB_ROOT_DIR "$ENV{TBBROOT}")
endif()
find_path(TBB_INCLUDE_DIRS
NAMES tbb/tbb.h
HINTS ${TBB_ROOT_DIR}/include)
find_library(TBB_LIBRARIES
NAMES tbb
HINTS ${TBB_ROOT_DIR}/lib ENV LIBRARY_PATH)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(TBB DEFAULT_MSG TBB_LIBRARIES TBB_INCLUDE_DIRS)
mark_as_advanced(
TBB_LIBRARIES
TBB_INCLUDE_DIRS)
if(TBB_FOUND AND NOT (TARGET TBB::TBB))
add_library (TBB::TBB UNKNOWN IMPORTED)
set_target_properties(TBB::TBB
PROPERTIES
IMPORTED_LOCATION ${TBB_LIBRARIES}
INTERFACE_INCLUDE_DIRECTORIES ${TBB_INCLUDE_DIRS})
endif()

View File

@ -1,29 +0,0 @@
# - Find gflags library
# Find the gflags includes and library
#
# GFLAGS_INCLUDE_DIR - where to find gflags.h.
# GFLAGS_LIBRARIES - List of libraries when using gflags.
# gflags_FOUND - True if gflags found.
find_path(GFLAGS_INCLUDE_DIR
NAMES gflags/gflags.h)
find_library(GFLAGS_LIBRARIES
NAMES gflags)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(gflags
DEFAULT_MSG GFLAGS_LIBRARIES GFLAGS_INCLUDE_DIR)
mark_as_advanced(
GFLAGS_LIBRARIES
GFLAGS_INCLUDE_DIR)
if(gflags_FOUND AND NOT (TARGET gflags::gflags))
add_library(gflags::gflags UNKNOWN IMPORTED)
set_target_properties(gflags::gflags
PROPERTIES
IMPORTED_LOCATION ${GFLAGS_LIBRARIES}
INTERFACE_INCLUDE_DIRECTORIES ${GFLAGS_INCLUDE_DIR}
IMPORTED_LINK_INTERFACE_LANGUAGES "CXX")
endif()

View File

@ -1,29 +0,0 @@
# - Find Lz4
# Find the lz4 compression library and includes
#
# lz4_INCLUDE_DIRS - where to find lz4.h, etc.
# lz4_LIBRARIES - List of libraries when using lz4.
# lz4_FOUND - True if lz4 found.
find_path(lz4_INCLUDE_DIRS
NAMES lz4.h
HINTS ${lz4_ROOT_DIR}/include)
find_library(lz4_LIBRARIES
NAMES lz4
HINTS ${lz4_ROOT_DIR}/lib)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(lz4 DEFAULT_MSG lz4_LIBRARIES lz4_INCLUDE_DIRS)
mark_as_advanced(
lz4_LIBRARIES
lz4_INCLUDE_DIRS)
if(lz4_FOUND AND NOT (TARGET lz4::lz4))
add_library(lz4::lz4 UNKNOWN IMPORTED)
set_target_properties(lz4::lz4
PROPERTIES
IMPORTED_LOCATION ${lz4_LIBRARIES}
INTERFACE_INCLUDE_DIRECTORIES ${lz4_INCLUDE_DIRS})
endif()

View File

@ -1,26 +0,0 @@
# - Find liburing
#
# uring_INCLUDE_DIR - Where to find liburing.h
# uring_LIBRARIES - List of libraries when using uring.
# uring_FOUND - True if uring found.
find_path(uring_INCLUDE_DIR
NAMES liburing.h)
find_library(uring_LIBRARIES
NAMES liburing.a liburing)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(uring
DEFAULT_MSG uring_LIBRARIES uring_INCLUDE_DIR)
mark_as_advanced(
uring_INCLUDE_DIR
uring_LIBRARIES)
if(uring_FOUND AND NOT TARGET uring::uring)
add_library(uring::uring UNKNOWN IMPORTED)
set_target_properties(uring::uring PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${uring_INCLUDE_DIR}"
IMPORTED_LINK_INTERFACE_LANGUAGES "C"
IMPORTED_LOCATION "${uring_LIBRARIES}")
endif()

View File

@ -1,29 +0,0 @@
# - Find zstd
# Find the zstd compression library and includes
#
# zstd_INCLUDE_DIRS - where to find zstd.h, etc.
# zstd_LIBRARIES - List of libraries when using zstd.
# zstd_FOUND - True if zstd found.
find_path(zstd_INCLUDE_DIRS
NAMES zstd.h
HINTS ${zstd_ROOT_DIR}/include)
find_library(zstd_LIBRARIES
NAMES zstd
HINTS ${zstd_ROOT_DIR}/lib)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(zstd DEFAULT_MSG zstd_LIBRARIES zstd_INCLUDE_DIRS)
mark_as_advanced(
zstd_LIBRARIES
zstd_INCLUDE_DIRS)
if(zstd_FOUND AND NOT (TARGET zstd::zstd))
add_library (zstd::zstd UNKNOWN IMPORTED)
set_target_properties(zstd::zstd
PROPERTIES
IMPORTED_LOCATION ${zstd_LIBRARIES}
INTERFACE_INCLUDE_DIRECTORIES ${zstd_INCLUDE_DIRS})
endif()

View File

@ -1,10 +0,0 @@
# Read rocksdb version from version.h header file.
function(get_rocksdb_version version_var)
file(READ "${CMAKE_CURRENT_SOURCE_DIR}/include/rocksdb/version.h" version_header_file)
foreach(component MAJOR MINOR PATCH)
string(REGEX MATCH "#define ROCKSDB_${component} ([0-9]+)" _ ${version_header_file})
set(ROCKSDB_VERSION_${component} ${CMAKE_MATCH_1})
endforeach()
set(${version_var} "${ROCKSDB_VERSION_MAJOR}.${ROCKSDB_VERSION_MINOR}.${ROCKSDB_VERSION_PATCH}" PARENT_SCOPE)
endfunction()

Some files were not shown because too many files have changed in this diff Show More