Revamp check_format_compatible.sh (#8012)

Summary:
* Adds backup/restore forward/backward compatibility testing
* Adds forward/backward compatibility testing to sst ingestion
* More structure sharing and comments for the lists of branches
comprising each group
* Less reliant on invariants between groups with de-duplication logic
* Restructured for n+1 branch checkout+build steps rather than something
like 3n. Should be much faster despite more checks.

And to make manual runs easier

* On success, restores working trees to original working branch (aborts
early if uncommitted changes) and deletes temporary branch & remote
* Adds SHORT_TEST=1 mode that uses only the oldest version for each
* Adds USE_SSH=1 to use ssh instead of https for github
group

Pull Request resolved: https://github.com/facebook/rocksdb/pull/8012

Test Plan:
a number of manual tests, mostly with SHORT_TEST=1. Using one
version older for any of the groups (except I didn't check
db_backward_only_refs) fails. Changing default format_version to 5
(planned) without updating this script fails as it should, and passes
with appropriate update. Full local run passed (had to remove "2.7.fb.branch"
due to compiler issues, also before this change).

Reviewed By: riversand963

Differential Revision: D26735840

Pulled By: pdillinger

fbshipit-source-id: 1320c22de5674760657e385aa42df9fade8b6fff
This commit is contained in:
Peter Dillinger 2021-03-02 11:40:36 -08:00 committed by Facebook GitHub Bot
parent a46f080cce
commit a9046f3c45
4 changed files with 282 additions and 83 deletions

15
tools/backup_db.sh Executable file
View File

@ -0,0 +1,15 @@
#!/usr/bin/env bash
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
#
#
if [ "$#" -lt 2 ]; then
echo "usage: ${BASH_SOURCE[0]} <DB Path> <Backup Dir>"
exit 1
fi
db_dir="$1"
backup_dir="$2"
echo "== Backing up DB $db_dir to $backup_dir"
./ldb backup --db="$db_dir" --backup_dir="$backup_dir"

View File

@ -1,25 +1,83 @@
#!/usr/bin/env bash #!/usr/bin/env bash
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
# #
# A shell script to load some pre generated data file to a DB using ldb tool # A shell script to build and run different versions of ldb to check for
# ./ldb needs to be avaible to be executed. # expected forward and backward compatibility with "current" version. The
# working copy must have no uncommitted changes.
#
# Usage: <SCRIPT> [ref_for_current]
# `ref_for_current` can be a revision, tag, commit or branch name. Default is HEAD.
# #
# Usage: <SCRIPT> [checkout]
# `checkout` can be a tag, commit or branch name. Will build using it and check DBs generated by all previous branches (or tags for very old versions without branch) can be opened by it.
# Return value 0 means all regression tests pass. 1 if not pass. # Return value 0 means all regression tests pass. 1 if not pass.
#
# Environment options:
# SHORT_TEST=1 - Test only the oldest branch for each kind of test. This is
# a good choice for PR validation as it is relatively fast and will find
# most issues.
# USE_SSH=1 - Connect to GitHub with ssh instead of https
scriptpath=`dirname $BASH_SOURCE` if ! git diff-index --quiet HEAD; then
test_dir=${TEST_TMPDIR:-"/tmp"}"/format_compatible_check" echo "You have uncommitted changes. Aborting."
exit 1
fi
current_checkout_name=${1:-HEAD}
# This allows the script to work even if with transient refs like "HEAD"
current_checkout_hash="$(git rev-parse --quiet --verify $current_checkout_name)"
if [ "$current_checkout_hash" == "" ]; then
echo "Not a recognized ref: $current_checkout_name"
exit 1
fi
# To restore to prior branch at the end
orig_branch="$(git rev-parse --abbrev-ref HEAD)"
tmp_branch=_tmp_format_compatible
tmp_origin=_tmp_origin
# Don't depend on what current "origin" might be
set -e
git remote remove $tmp_origin 2>/dev/null || true
if [ "$USE_SSH" ]; then
git remote add $tmp_origin "git@github.com:facebook/rocksdb.git"
else
git remote add $tmp_origin "https://github.com/facebook/rocksdb.git"
fi
git fetch $tmp_origin
cleanup() {
echo "== Cleaning up"
git checkout "$orig_branch" || true
git branch -D $tmp_branch || true
git remote remove $tmp_origin || true
}
trap cleanup EXIT # Always clean up, even on failure or Ctrl+C
scriptpath=`dirname ${BASH_SOURCE[0]}`
test_dir=${TEST_TMPDIR:-"/tmp"}"/rocksdb_format_compatible_$USER"
rm -rf ${test_dir:?}
# For saving current version of scripts as we checkout different versions to test
script_copy_dir=$test_dir"/script_copy" script_copy_dir=$test_dir"/script_copy"
input_data_path=$test_dir"/test_data_input/" mkdir -p $script_copy_dir
cp -f $scriptpath/*.sh $script_copy_dir
# For shared raw input data
input_data_path=$test_dir"/test_data_input"
mkdir -p $input_data_path
# For external sst ingestion test
ext_test_dir=$test_dir"/ext"
mkdir -p $ext_test_dir
# For DB dump test
db_test_dir=$test_dir"/db"
mkdir -p $db_test_dir
# For backup/restore test (uses DB test)
bak_test_dir=$test_dir"/bak"
mkdir -p $bak_test_dir
python_bin=$(which python3 || which python || echo python3) python_bin=$(which python3 || which python || echo python3)
mkdir $test_dir || true
mkdir $input_data_path || true
rm -rf $script_copy_dir
cp $scriptpath $script_copy_dir -rf
# Generate random files. # Generate random files.
for i in {1..6} for i in {1..6}
do do
@ -55,11 +113,55 @@ with open('${sorted_input_data}', 'w') as f:
print(k + " ==> " + v, file=f) print(k + " ==> " + v, file=f)
EOF EOF
declare -a backward_compatible_checkout_objs=("2.2.fb.branch" "2.3.fb.branch" "2.4.fb.branch" "2.5.fb.branch" "2.6.fb.branch" "2.7.fb.branch" "2.8.1.fb" "3.0.fb.branch" "3.1.fb" "3.2.fb" "3.3.fb" "3.4.fb" "3.5.fb" "3.6.fb" "3.7.fb" "3.8.fb" "3.9.fb" "4.2.fb" "4.3.fb" "4.4.fb" "4.5.fb" "4.6.fb" "4.7.fb" "4.8.fb" "4.9.fb" "4.10.fb" "4.11.fb" "4.12.fb" "4.13.fb" "5.0.fb" "5.1.fb" "5.2.fb" "5.3.fb" "5.4.fb" "5.5.fb" "5.6.fb" "5.7.fb" "5.8.fb" "5.9.fb" "5.10.fb" "5.11.fb" "5.12.fb" "5.13.fb" "5.14.fb" "5.15.fb") # db_backward_only_refs defined below the rest
declare -a forward_compatible_checkout_objs=() # N/A at the moment
declare -a forward_compatible_with_options_checkout_objs=("5.16.fb" "5.17.fb" "5.18.fb" "6.0.fb" "6.1.fb" "6.2.fb" "6.3.fb" "6.4.fb" "6.5.fb" "6.6.fb" "6.7.fb" "6.8.fb" "6.9.fb" "6.10.fb" "6.11.fb" "6.12.fb" "6.13.fb" "6.14.fb" "6.15.fb" "6.16.fb" "6.17.fb") # To check for DB forward compatibility with loading options (old version
declare -a checkout_objs=(${backward_compatible_checkout_objs[@]} ${forward_compatible_checkout_objs[@]} ${forward_compatible_with_options_checkout_objs[@]}) # reading data from new), as well as backward compatibility
declare -a extern_sst_ingestion_compatible_checkout_objs=("5.16.fb" "5.17.fb" "5.18.fb" "6.0.fb" "6.1.fb" "6.2.fb" "6.3.fb" "6.4.fb" "6.5.fb" "6.6.fb" "6.7.fb" "6.8.fb" "6.9.fb" "6.10.fb" "6.11.fb" "6.12.fb" "6.13.fb" "6.14.fb" "6.15.fb" "6.16.fb" "6.17.fb") declare -a db_forward_with_options_refs=("5.16.fb" "5.17.fb" "5.18.fb" "6.0.fb" "6.1.fb" "6.2.fb" "6.3.fb" "6.4.fb" "6.5.fb" "6.6.fb" "6.7.fb" "6.8.fb" "6.9.fb" "6.10.fb" "6.11.fb" "6.12.fb" "6.13.fb" "6.14.fb" "6.15.fb" "6.16.fb" "6.17.fb")
# To check for DB forward compatibility without loading options (in addition
# to the "with loading options" set), as well as backward compatibility
declare -a db_forward_no_options_refs=() # N/A at the moment
# To check for SST ingestion backward compatibility (new version reading
# data from old) (ldb ingest_extern_sst added in 5.16.x, back-ported to
# 5.14.x, 5.15.x)
declare -a ext_backward_only_refs=("5.14.fb" "5.15.fb")
# To check for SST ingestion forward compatibility (old version reading
# data from new) as well as backward compatibility
declare -a ext_forward_refs=("${db_forward_no_options_refs[@]}" "${db_forward_with_options_refs[@]}")
# To check for backup backward compatibility (new version reading data
# from old) (ldb backup/restore added in 4.11.x)
declare -a bak_backward_only_refs=("4.11.fb" "4.12.fb" "4.13.fb" "5.0.fb" "5.1.fb" "5.2.fb" "5.3.fb" "5.4.fb" "5.5.fb" "5.6.fb" "5.7.fb" "5.8.fb" "5.9.fb" "5.10.fb" "5.11.fb" "5.12.fb" "5.13.fb" "${ext_backward_only_refs[@]}")
# To check for backup forward compatibility (old version reading data
# from new) as well as backward compatibility
declare -a bak_forward_refs=("${db_forward_no_options_refs[@]}" "${db_forward_with_options_refs[@]}")
# Branches (git refs) to check for DB backward compatibility (new version
# reading data from old) (in addition to the "forward compatible" list)
# NOTE: 2.7.fb.branch shows assertion violation in some configurations
declare -a db_backward_only_refs=("2.2.fb.branch" "2.3.fb.branch" "2.4.fb.branch" "2.5.fb.branch" "2.6.fb.branch" "2.7.fb.branch" "2.8.1.fb" "3.0.fb.branch" "3.1.fb" "3.2.fb" "3.3.fb" "3.4.fb" "3.5.fb" "3.6.fb" "3.7.fb" "3.8.fb" "3.9.fb" "4.2.fb" "4.3.fb" "4.4.fb" "4.5.fb" "4.6.fb" "4.7.fb" "4.8.fb" "4.9.fb" "4.10.fb" "${bak_backward_only_refs[@]}")
if [ "$SHORT_TEST" ]; then
# Use only the first (if exists) of each list
db_backward_only_refs=(${db_backward_only_refs[0]})
db_forward_no_options_refs=(${db_forward_no_options_refs[0]})
db_forward_with_options_refs=(${db_forward_with_options_refs[0]})
ext_backward_only_refs=(${ext_backward_only_refs[0]})
ext_forward_refs=(${ext_forward_refs[0]})
bak_backward_only_refs=(${bak_backward_only_refs[0]})
bak_forward_refs=(${bak_forward_refs[0]})
fi
# De-duplicate & accumulate
declare -a checkout_refs=()
for checkout_ref in "${db_backward_only_refs[@]}" "${db_forward_no_options_refs[@]}" "${db_forward_with_options_refs[@]}" "${ext_backward_only_refs[@]}" "${ext_forward_refs[@]}" "${bak_backward_only_refs[@]}" "${bak_forward_refs[@]}"
do
if [ ! -e $db_test_dir/$checkout_ref ]; then
mkdir -p $db_test_dir/$checkout_ref
checkout_refs+=($checkout_ref)
fi
done
generate_db() generate_db()
{ {
@ -105,89 +207,155 @@ ingest_external_sst()
set -e set -e
} }
# Sandcastle sets us up with a remote that is just another directory on the same backup_db()
# machine and doesn't have our branches. Need to fetch them so checkout works. {
# Remote add may fail if added previously (we don't cleanup). set +e
git remote add github_origin "https://github.com/facebook/rocksdb.git" $script_copy_dir/backup_db.sh $1 $2
set -e if [ $? -ne 0 ]; then
git fetch github_origin echo ==== Error backing up DB $1 to $2 ====
exit 1
fi
set -e
}
# Compatibility test for external SST file ingestion restore_db()
for checkout_obj in "${extern_sst_ingestion_compatible_checkout_objs[@]}" {
do set +e
echo == Generating DB with extern SST file in "$checkout_obj" ... $script_copy_dir/restore_db.sh $1 $2
git checkout github_origin/$checkout_obj -b $checkout_obj if [ $? -ne 0 ]; then
make clean echo ==== Error restoring from $1 to $2 ====
DISABLE_WARNING_AS_ERROR=1 make ldb -j32 exit 1
write_external_sst $input_data_path $test_dir/$checkout_obj $test_dir/$checkout_obj fi
ingest_external_sst $test_dir/$checkout_obj $test_dir/$checkout_obj set -e
done }
checkout_flag=${1:-"master"} member_of_array()
{
local e match="$1"
shift
for e; do [[ "$e" == "$match" ]] && return 0; done
return 1
}
echo == Building $checkout_flag debug # General structure from here:
git checkout github_origin/$checkout_flag -b tmp-$checkout_flag # * Check out, build, and do stuff with the "current" branch.
# * For each older branch under consideration,
# * Check out, build, and do stuff with it, potentially using data
# generated from "current" branch.
# * (Again) check out, build, and do (other) stuff with the "current"
# branch, potentially using data from older branches.
#
# This way, we only do at most n+1 checkout+build steps, without the
# need to stash away executables.
# Decorate name
current_checkout_name="$current_checkout_name ($current_checkout_hash)"
echo "== Building $current_checkout_name debug"
git checkout -B $tmp_branch $current_checkout_hash
make clean make clean
DISABLE_WARNING_AS_ERROR=1 make ldb -j32 DISABLE_WARNING_AS_ERROR=1 make ldb -j32
compare_base_db_dir=$test_dir"/base_db_dir"
write_external_sst $input_data_path $compare_base_db_dir $compare_base_db_dir
ingest_external_sst $compare_base_db_dir $compare_base_db_dir
for checkout_obj in "${extern_sst_ingestion_compatible_checkout_objs[@]}" echo "== Using $current_checkout_name, generate DB with extern SST and ingest"
current_ext_test_dir=$ext_test_dir"/current"
write_external_sst $input_data_path ${current_ext_test_dir}_pointless $current_ext_test_dir
ingest_external_sst ${current_ext_test_dir}_ingest $current_ext_test_dir
echo "== Generating DB from $current_checkout_name ..."
current_db_test_dir=$db_test_dir"/current"
generate_db $input_data_path $current_db_test_dir
echo "== Creating backup of DB from $current_checkout_name ..."
current_bak_test_dir=$bak_test_dir"/current"
backup_db $current_db_test_dir $current_bak_test_dir
for checkout_ref in "${checkout_refs[@]}"
do do
echo == Build "$checkout_obj" and try to open DB generated using $checkout_flag echo "== Building $checkout_ref debug"
git checkout $checkout_obj git reset --hard $tmp_origin/$checkout_ref
make clean make clean
DISABLE_WARNING_AS_ERROR=1 make ldb -j32 DISABLE_WARNING_AS_ERROR=1 make ldb -j32
compare_db $test_dir/$checkout_obj $compare_base_db_dir db_dump.txt 1 1
git checkout tmp-$checkout_flag # We currently assume DB backward compatibility for every branch listed
# Clean up echo "== Use $checkout_ref to generate a DB ..."
git branch -D $checkout_obj generate_db $input_data_path $db_test_dir/$checkout_ref
if member_of_array "$checkout_ref" "${ext_backward_only_refs[@]}" ||
member_of_array "$checkout_ref" "${ext_forward_refs[@]}"
then
echo "== Use $checkout_ref to generate DB with extern SST file"
write_external_sst $input_data_path $ext_test_dir/${checkout_ref}_pointless $ext_test_dir/$checkout_ref
fi
if member_of_array "$checkout_ref" "${ext_forward_refs[@]}"
then
echo "== Use $checkout_ref to ingest extern SST file and compare vs. $current_checkout_name"
ingest_external_sst $ext_test_dir/${checkout_ref}_ingest $ext_test_dir/$checkout_ref
compare_db $ext_test_dir/${checkout_ref}_ingest ${current_ext_test_dir}_ingest db_dump.txt 1 1
rm -rf ${ext_test_dir:?}/${checkout_ref}_ingest
echo "== Use $checkout_ref to ingest extern SST file from $current_checkout_name"
ingest_external_sst $ext_test_dir/${checkout_ref}_ingest $current_ext_test_dir
compare_db $ext_test_dir/${checkout_ref}_ingest ${current_ext_test_dir}_ingest db_dump.txt 1 1
fi
if member_of_array "$checkout_ref" "${db_forward_no_options_refs[@]}" ||
member_of_array "$checkout_ref" "${db_forward_with_options_refs[@]}"
then
echo "== Use $checkout_ref to open DB generated using $current_checkout_name..."
compare_db $db_test_dir/$checkout_ref $current_db_test_dir forward_${checkout_ref}_dump.txt 0
fi
if member_of_array "$checkout_ref" "${db_forward_with_options_refs[@]}"
then
echo "== Use $checkout_ref to open DB generated using $current_checkout_name with its options..."
compare_db $db_test_dir/$checkout_ref $current_db_test_dir forward_${checkout_ref}_dump.txt 1 1
fi
if member_of_array "$checkout_ref" "${bak_backward_only_refs[@]}" ||
member_of_array "$checkout_ref" "${bak_forward_refs[@]}"
then
echo "== Use $checkout_ref to backup DB"
backup_db $db_test_dir/$checkout_ref $bak_test_dir/$checkout_ref
fi
if member_of_array "$checkout_ref" "${bak_forward_refs[@]}"
then
echo "== Use $checkout_ref to restore DB from $current_checkout_name"
rm -rf ${db_test_dir:?}/$checkout_ref
restore_db $current_bak_test_dir $db_test_dir/$checkout_ref
compare_db $db_test_dir/$checkout_ref $current_db_test_dir forward_${checkout_ref}_dump.txt 0
fi
done done
echo == Finish compatibility test for SST ingestion. echo "== Building $current_checkout_name debug (again, final)"
git reset --hard $current_checkout_hash
for checkout_obj in "${checkout_objs[@]}"
do
echo == Generating DB from "$checkout_obj" ...
git checkout github_origin/$checkout_obj -b $checkout_obj
make clean
DISABLE_WARNING_AS_ERROR=1 make ldb -j32
generate_db $input_data_path $test_dir/$checkout_obj
done
checkout_flag=${1:-"master"}
echo == Building $checkout_flag debug
git checkout tmp-$checkout_flag
make clean make clean
DISABLE_WARNING_AS_ERROR=1 make ldb -j32 DISABLE_WARNING_AS_ERROR=1 make ldb -j32
compare_base_db_dir=$test_dir"/base_db_dir"
echo == Generate compare base DB to $compare_base_db_dir
generate_db $input_data_path $compare_base_db_dir
for checkout_obj in "${checkout_objs[@]}" for checkout_ref in "${checkout_refs[@]}"
do do
echo == Opening DB from "$checkout_obj" using debug build of $checkout_flag ... # We currently assume DB backward compatibility for every branch listed
compare_db $test_dir/$checkout_obj $compare_base_db_dir db_dump.txt 1 0 echo "== Use $current_checkout_name to open DB generated using $checkout_ref..."
done compare_db $db_test_dir/$checkout_ref $current_db_test_dir db_dump.txt 1 0
for checkout_obj in "${forward_compatible_checkout_objs[@]}" if member_of_array "$checkout_ref" "${ext_backward_only_refs[@]}" ||
do member_of_array "$checkout_ref" "${ext_forward_refs[@]}"
echo == Build "$checkout_obj" and try to open DB generated using $checkout_flag... then
git checkout $checkout_obj rm -rf ${ext_test_dir:?}/${checkout_ref}_ingest
make clean echo "== Use $current_checkout_name to ingest extern SST file from $checkout_ref"
DISABLE_WARNING_AS_ERROR=1 make ldb -j32 ingest_external_sst $ext_test_dir/${checkout_ref}_ingest $current_ext_test_dir
compare_db $test_dir/$checkout_obj $compare_base_db_dir forward_${checkout_obj}_dump.txt 0 compare_db $ext_test_dir/${checkout_ref}_ingest ${current_ext_test_dir}_ingest db_dump.txt 1 1
done fi
for checkout_obj in "${forward_compatible_with_options_checkout_objs[@]}" if member_of_array "$checkout_ref" "${bak_backward_only_refs[@]}" ||
do member_of_array "$checkout_ref" "${bak_forward_refs[@]}"
echo == Build "$checkout_obj" and try to open DB generated using $checkout_flag with its options... then
git checkout $checkout_obj echo "== Use $current_checkout_name to restore DB from $checkout_ref"
make clean rm -rf ${db_test_dir:?}/$checkout_ref
DISABLE_WARNING_AS_ERROR=1 make ldb -j32 restore_db $bak_test_dir/$checkout_ref $db_test_dir/$checkout_ref
compare_db $test_dir/$checkout_obj $compare_base_db_dir forward_${checkout_obj}_dump.txt 1 1 compare_db $db_test_dir/$checkout_ref $current_db_test_dir db_dump.txt 1 0
fi
done done
echo ==== Compatibility Test PASSED ==== echo ==== Compatibility Test PASSED ====

15
tools/restore_db.sh Executable file
View File

@ -0,0 +1,15 @@
#!/usr/bin/env bash
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
#
#
if [ "$#" -lt 2 ]; then
echo "usage: ${BASH_SOURCE[0]} <Backup Dir> <DB Path>"
exit 1
fi
backup_dir="$1"
db_dir="$2"
echo "== Restoring latest from $backup_dir to $db_dir"
./ldb restore --db="$db_dir" --backup_dir="$backup_dir"

View File

@ -12,6 +12,7 @@ input_data_dir=$1
db_dir=$2 db_dir=$2
extern_sst_dir=$3 extern_sst_dir=$3
rm -rf $db_dir rm -rf $db_dir
mkdir -p $extern_sst_dir
set -e set -e