Summary: See https://github.com/facebook/rocksdb/issues/14240 which brought this to my attention. Here I've added range deletions and compactions to the format compatible test, and fixed or worked-around compatibility issues (likely longstanding). The first fix was in Version::MaybeInitializeFileMetaData for an assertion failure simply from adding range deletions from some 5.x version. The second fix is a broader work-around for older SST files with unreliable num_entries/num_range_deletions/num_deletions statistics in their table properties. We depend on them only for some paranoid checks for compaction, so in my assessment the best way to deal with those files is to exclude the paranoid checks when dealing with the files with unrelaible data. (Details in code comments.) The important part is that compacting old files is exceptionally rare, so we aren't really interefering with the paranoid checks doing thier job on an ongoing basis. This depends on https://github.com/facebook/rocksdb/issues/14315 (just landed) because there is a remaining undiagnosed problem with some very early releases, but I'm not fixing that because its support is being dropped. Pull Request resolved: https://github.com/facebook/rocksdb/pull/14323 Test Plan: test extended (ran locally excluding some releases) Reviewed By: xingbowang Differential Revision: D93032653 Pulled By: pdillinger fbshipit-source-id: f90b32f30ba4764692e68d23705f42c778e0dc1d
103 lines
3.3 KiB
Bash
Executable file
103 lines
3.3 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
|
|
#
|
|
# A shell script to load some pre generated data file to a DB using ldb tool
|
|
# ./ldb needs to be avaible to be executed.
|
|
#
|
|
# Usage: <SCRIPT> <input_data_path> <DB Path>
|
|
|
|
if [ "$#" -lt 2 ]; then
|
|
echo "usage: $BASH_SOURCE <input_data_path> <DB Path>"
|
|
exit 1
|
|
fi
|
|
|
|
input_data_dir=$1
|
|
db_dir=$2
|
|
rm -rf $db_dir
|
|
|
|
second_gen_compression_support=
|
|
mixed_compression_support=
|
|
# Support for `ldb --version` is a crude under-approximation for versions
|
|
# supporting dictionary compression and algorithms including zstd and lz4
|
|
if ./ldb --version 2>/dev/null >/dev/null; then
|
|
second_gen_compression_support=1
|
|
|
|
if ./ldb load --db=$db_dir --compression_type=mixed --create_if_missing \
|
|
< /dev/null 2>/dev/null >/dev/null; then
|
|
mixed_compression_support=1
|
|
fi
|
|
rm -rf $db_dir
|
|
fi
|
|
|
|
# Check if deleterange command is supported by grepping ldb --help
|
|
deleterange_support=
|
|
if ./ldb --help 2>&1 | grep -q deleterange; then
|
|
deleterange_support=1
|
|
fi
|
|
|
|
echo == Loading data from $input_data_dir to $db_dir
|
|
|
|
declare -a compression_opts=("no" "snappy" "zlib" "bzip2")
|
|
allow_dict=0
|
|
|
|
if [ "$second_gen_compression_support" == 1 ]; then
|
|
if [ "$mixed_compression_support" == 1 ]; then
|
|
compression_opts=("zstd" "no" "snappy" "zlib" "bzip2" "lz4" "lz4hc" "mixed")
|
|
else
|
|
compression_opts=("zstd" "no" "snappy" "zlib" "bzip2" "lz4" "lz4hc")
|
|
fi
|
|
fi
|
|
|
|
set -e
|
|
|
|
n=$RANDOM
|
|
c_count=${#compression_opts[@]}
|
|
|
|
for f in `ls -1 $input_data_dir`
|
|
do
|
|
# NOTE: This will typically accumulate the loaded data into a .log file which
|
|
# will only be flushed to an SST file on recovery in the next iteration, so
|
|
# compression settings of this iteration might only apply to data from the
|
|
# previous iteration (if there was one). This has the advantage of leaving a
|
|
# WAL file for testing its format compatibility (in addition to SST files
|
|
# etc.)
|
|
c=${compression_opts[n % c_count]}
|
|
d=$((n / c_count % 2 * 12345))
|
|
echo == Loading $f with compression $c dict bytes $d
|
|
if [ "$second_gen_compression_support" == 1 ]; then
|
|
d_arg=--compression_max_dict_bytes=$d
|
|
else
|
|
d_arg=""
|
|
fi
|
|
./ldb load --db=$db_dir --compression_type=$c $d_arg --bloom_bits=10 \
|
|
--auto_compaction=false --create_if_missing < $input_data_dir/$f
|
|
|
|
# Use md5sum of file to deterministically decide whether to add a range
|
|
# tombstone (approximately 1/4 of files) and which key to delete
|
|
file_path=$input_data_dir/$f
|
|
hash=$(md5sum "$file_path" | cut -c1-8)
|
|
hash_int=$((16#$hash))
|
|
|
|
if [ $((hash_int % 4)) -eq 0 ]; then
|
|
# Pick a key from this file based on the hash
|
|
line_count=$(wc -l < "$file_path")
|
|
if [ "$line_count" -gt 0 ]; then
|
|
line_num=$((hash_int % line_count + 1))
|
|
key=$(sed -n "${line_num}p" "$file_path" | cut -d' ' -f1)
|
|
if [ -n "$key" ]; then
|
|
# Create end key by appending a character to make a small range
|
|
end_key="${key}0"
|
|
if [ "$deleterange_support" == "1" ]; then
|
|
echo "== Deleting range [$key, $end_key) from $f"
|
|
./ldb deleterange --db=$db_dir "$key" "$end_key"
|
|
else
|
|
# Fall back to point delete for equivalent logical contents
|
|
echo "== Deleting key $key from $f"
|
|
./ldb delete --db=$db_dir "$key"
|
|
fi
|
|
fi
|
|
fi
|
|
fi
|
|
|
|
let "n = n + 1"
|
|
done
|