ClickHouse/tests/queries/0_stateless/02732_rename_after_processing.sh

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

130 lines
4.2 KiB
Bash
Raw Normal View History

2023-05-07 14:59:40 +00:00
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
# see 01658_read_file_to_stringcolumn.sh
CLICKHOUSE_USER_FILES_PATH=$(clickhouse-client --query "select _path, _file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
# Prepare data
2023-05-16 18:48:39 +00:00
unique_name=${CLICKHOUSE_TEST_UNIQUE_NAME}
tmp_dir=${CLICKHOUSE_USER_FILES_PATH}/${unique_name}
mkdir -p $tmp_dir
2023-05-17 19:49:17 +00:00
rm -rf ${tmp_dir:?}/*
chmod 777 ${tmp_dir}
2023-05-16 18:48:39 +00:00
echo '"id","str","int","text"' > ${tmp_dir}/tmp.csv
echo '1,"abc",123,"abacaba"' >> ${tmp_dir}/tmp.csv
echo '2,"def",456,"bacabaa"' >> ${tmp_dir}/tmp.csv
echo '3,"story",78912,"acabaab"' >> ${tmp_dir}/tmp.csv
echo '4,"history",21321321,"cabaaba"' >> ${tmp_dir}/tmp.csv
chmod 777 ${tmp_dir}/tmp.csv
2023-05-16 18:48:39 +00:00
cp ${tmp_dir}/tmp.csv ${tmp_dir}/tmp1.csv
cp ${tmp_dir}/tmp.csv ${tmp_dir}/tmp2.csv
cp ${tmp_dir}/tmp.csv ${tmp_dir}/tmp3_1.csv
cp ${tmp_dir}/tmp.csv ${tmp_dir}/tmp3_2.csv
cp ${tmp_dir}/tmp.csv ${tmp_dir}/tmp4.csv
cp ${tmp_dir}/tmp.csv ${tmp_dir}/tmp5.csv
cp ${tmp_dir}/tmp.csv ${tmp_dir}/tmp6.csv
2023-05-07 14:59:40 +00:00
### Checking that renaming works
# simple select
2023-05-16 18:48:39 +00:00
${CLICKHOUSE_CLIENT} --rename-files-after-processing="processed_%f%e" -q "SELECT COUNT(*) FROM file('${unique_name}/tmp1.csv')"
if [ -e "${tmp_dir}/processed_tmp1.csv" ]; then
2023-05-07 17:43:34 +00:00
echo "processed_tmp1.csv"
fi
2023-05-16 18:48:39 +00:00
if [ ! -e "${tmp_dir}/tmp1.csv" ]; then
2023-05-07 17:43:34 +00:00
echo "!tmp1.csv"
2023-05-07 14:59:40 +00:00
fi
# select with multiple file() calls
${CLICKHOUSE_CLIENT} --rename-files-after-processing="processed_%f%e" --multiline -q """
SELECT
sum(a.id) as aid,
sum(b.id) as bid
2023-05-16 18:48:39 +00:00
FROM file('${unique_name}/tmp2.csv') AS a
INNER JOIN file('${unique_name}/tmp2.csv') AS b
2023-05-07 14:59:40 +00:00
ON a.text = b.text
"""
2023-05-16 18:48:39 +00:00
if [ -e "${tmp_dir}/processed_tmp2.csv" ]; then
2023-05-07 17:43:34 +00:00
echo "processed_tmp2.csv"
fi
2023-05-16 18:48:39 +00:00
if [ ! -e "${tmp_dir}/tmp2.csv" ]; then
2023-05-07 17:43:34 +00:00
echo "!tmp2.csv"
2023-05-07 14:59:40 +00:00
fi
# rename multiple files
2023-05-16 18:48:39 +00:00
${CLICKHOUSE_CLIENT} --rename-files-after-processing="processed_%f%e" -q "SELECT COUNT(*) FROM file('${unique_name}/tmp3*.csv')"
if [ -e "${tmp_dir}/processed_tmp3_1.csv" ]; then
2023-05-07 17:43:34 +00:00
echo "processed_tmp3_1.csv"
fi
2023-05-16 18:48:39 +00:00
if [ -e "${tmp_dir}/processed_tmp3_2.csv" ]; then
2023-05-07 17:43:34 +00:00
echo "processed_tmp3_2.csv"
fi
2023-05-16 18:48:39 +00:00
if [ ! -e "${tmp_dir}/tmp3_1.csv" ]; then
2023-05-07 17:43:34 +00:00
echo "!tmp3_1.csv"
2023-05-07 14:59:40 +00:00
fi
2023-05-16 18:48:39 +00:00
if [ ! -e "${tmp_dir}/tmp3_2.csv" ]; then
2023-05-07 17:43:34 +00:00
echo "!tmp3_2.csv"
2023-05-07 14:59:40 +00:00
fi
# check timestamp placeholder
2023-05-16 18:48:39 +00:00
${CLICKHOUSE_CLIENT} --rename-files-after-processing="processed_%f_%t.csv" -q "SELECT COUNT(*) FROM file('${unique_name}/tmp4.csv')"
# ls ${tmp_dir} | grep -E "^processed_tmp4_[0-9]+\.csv$" > /dev/null && echo "OK"
2023-05-07 17:43:34 +00:00
rg="processed_tmp4_[0-9]+\.csv"
2023-05-16 18:48:39 +00:00
for x in "${tmp_dir}"/processed*; do
2023-05-07 17:43:34 +00:00
if [[ $x =~ $rg ]]; then
echo "OK"
break
fi;
done
2023-05-07 14:59:40 +00:00
### Checking errors
# cannot overwrite an existing file
2023-05-16 18:48:39 +00:00
${CLICKHOUSE_CLIENT} --rename-files-after-processing="tmp.csv" -q "SELECT COUNT(*) FROM file('${unique_name}/tmp5.csv')" \
2023-05-07 14:59:40 +00:00
2>&1| grep "already exists" > /dev/null && echo "OK"
2023-05-16 18:48:39 +00:00
if [ -e "${tmp_dir}/tmp5.csv" ]; then
2023-05-07 17:43:34 +00:00
echo "tmp5.csv"
fi
2023-05-07 14:59:40 +00:00
2023-05-16 18:48:39 +00:00
# сannot move file outside user_files
${CLICKHOUSE_CLIENT} --rename-files-after-processing="../../%f%e" -q "SELECT COUNT(*) FROM file('${unique_name}/tmp5.csv')" \
2023-05-07 14:59:40 +00:00
2>&1| grep "is not inside" > /dev/null && echo "OK"
2023-05-16 18:48:39 +00:00
if [ -e "${tmp_dir}/tmp5.csv" ]; then
echo "tmp5.csv"
fi
# check invalid placeholders
# unknown type of placeholder (%k)
${CLICKHOUSE_CLIENT} --rename-files-after-processing="processed_%f_%k" -q "SELECT COUNT(*) FROM file('${unique_name}/tmp5.csv')" \
2>&1| grep "Allowed placeholders only" > /dev/null && echo "OK"
if [ -e "${tmp_dir}/tmp5.csv" ]; then
echo "tmp5.csv"
fi
# dd number of consecutive percentage signs after replace valid placeholders
${CLICKHOUSE_CLIENT} --rename-files-after-processing="processed_%f_%%%%e" -q "SELECT COUNT(*) FROM file('${unique_name}/tmp5.csv')" \
2>&1| grep "Odd number of consecutive percentage signs" > /dev/null && echo "OK"
if [ -e "${tmp_dir}/tmp5.csv" ]; then
2023-05-07 17:43:34 +00:00
echo "tmp5.csv"
fi
2023-05-07 14:59:40 +00:00
# check full file name placeholder
${CLICKHOUSE_CLIENT} --rename-files-after-processing="%a.processed" -q "SELECT COUNT(*) FROM file('${unique_name}/tmp6.csv')"
if [ -e "${tmp_dir}/tmp6.csv.processed" ]; then
echo "tmp6.csv.processed"
fi
if [ ! -e "${tmp_dir}/tmp6.csv" ]; then
echo "!tmp6.csv"
fi
2023-05-07 14:59:40 +00:00
# Clean
2023-05-16 18:48:39 +00:00
rm -rd $tmp_dir