2023-05-07 14:59:40 +00:00
|
|
|
|
#!/usr/bin/env bash
|
|
|
|
|
|
|
|
|
|
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
|
|
|
|
# shellcheck source=../shell_config.sh
|
|
|
|
|
. "$CURDIR"/../shell_config.sh
|
|
|
|
|
|
|
|
|
|
# see 01658_read_file_to_stringcolumn.sh
|
|
|
|
|
CLICKHOUSE_USER_FILES_PATH=$(clickhouse-client --query "select _path, _file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
|
|
|
|
|
|
|
|
|
|
# Prepare data
|
2023-05-16 18:48:39 +00:00
|
|
|
|
unique_name=${CLICKHOUSE_TEST_UNIQUE_NAME}
|
|
|
|
|
tmp_dir=${CLICKHOUSE_USER_FILES_PATH}/${unique_name}
|
|
|
|
|
mkdir -p $tmp_dir
|
2023-05-17 19:49:17 +00:00
|
|
|
|
rm -rf ${tmp_dir:?}/*
|
2023-05-17 19:14:46 +00:00
|
|
|
|
|
|
|
|
|
chmod 777 ${tmp_dir}
|
|
|
|
|
|
2023-05-16 18:48:39 +00:00
|
|
|
|
echo '"id","str","int","text"' > ${tmp_dir}/tmp.csv
|
|
|
|
|
echo '1,"abc",123,"abacaba"' >> ${tmp_dir}/tmp.csv
|
|
|
|
|
echo '2,"def",456,"bacabaa"' >> ${tmp_dir}/tmp.csv
|
|
|
|
|
echo '3,"story",78912,"acabaab"' >> ${tmp_dir}/tmp.csv
|
|
|
|
|
echo '4,"history",21321321,"cabaaba"' >> ${tmp_dir}/tmp.csv
|
|
|
|
|
|
2023-05-17 19:14:46 +00:00
|
|
|
|
chmod 777 ${tmp_dir}/tmp.csv
|
|
|
|
|
|
2023-05-16 18:48:39 +00:00
|
|
|
|
cp ${tmp_dir}/tmp.csv ${tmp_dir}/tmp1.csv
|
|
|
|
|
cp ${tmp_dir}/tmp.csv ${tmp_dir}/tmp2.csv
|
|
|
|
|
cp ${tmp_dir}/tmp.csv ${tmp_dir}/tmp3_1.csv
|
|
|
|
|
cp ${tmp_dir}/tmp.csv ${tmp_dir}/tmp3_2.csv
|
|
|
|
|
cp ${tmp_dir}/tmp.csv ${tmp_dir}/tmp4.csv
|
|
|
|
|
cp ${tmp_dir}/tmp.csv ${tmp_dir}/tmp5.csv
|
2023-06-29 13:35:27 +00:00
|
|
|
|
cp ${tmp_dir}/tmp.csv ${tmp_dir}/tmp6.csv
|
2023-05-07 14:59:40 +00:00
|
|
|
|
|
|
|
|
|
### Checking that renaming works
|
|
|
|
|
|
|
|
|
|
# simple select
|
2023-05-16 18:48:39 +00:00
|
|
|
|
${CLICKHOUSE_CLIENT} --rename-files-after-processing="processed_%f%e" -q "SELECT COUNT(*) FROM file('${unique_name}/tmp1.csv')"
|
|
|
|
|
if [ -e "${tmp_dir}/processed_tmp1.csv" ]; then
|
2023-05-07 17:43:34 +00:00
|
|
|
|
echo "processed_tmp1.csv"
|
|
|
|
|
fi
|
2023-05-16 18:48:39 +00:00
|
|
|
|
if [ ! -e "${tmp_dir}/tmp1.csv" ]; then
|
2023-05-07 17:43:34 +00:00
|
|
|
|
echo "!tmp1.csv"
|
2023-05-07 14:59:40 +00:00
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# select with multiple file() calls
|
|
|
|
|
${CLICKHOUSE_CLIENT} --rename-files-after-processing="processed_%f%e" --multiline -q """
|
|
|
|
|
SELECT
|
|
|
|
|
sum(a.id) as aid,
|
|
|
|
|
sum(b.id) as bid
|
2023-05-16 18:48:39 +00:00
|
|
|
|
FROM file('${unique_name}/tmp2.csv') AS a
|
|
|
|
|
INNER JOIN file('${unique_name}/tmp2.csv') AS b
|
2023-05-07 14:59:40 +00:00
|
|
|
|
ON a.text = b.text
|
|
|
|
|
"""
|
2023-05-16 18:48:39 +00:00
|
|
|
|
if [ -e "${tmp_dir}/processed_tmp2.csv" ]; then
|
2023-05-07 17:43:34 +00:00
|
|
|
|
echo "processed_tmp2.csv"
|
|
|
|
|
fi
|
2023-05-16 18:48:39 +00:00
|
|
|
|
if [ ! -e "${tmp_dir}/tmp2.csv" ]; then
|
2023-05-07 17:43:34 +00:00
|
|
|
|
echo "!tmp2.csv"
|
2023-05-07 14:59:40 +00:00
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# rename multiple files
|
2023-05-16 18:48:39 +00:00
|
|
|
|
${CLICKHOUSE_CLIENT} --rename-files-after-processing="processed_%f%e" -q "SELECT COUNT(*) FROM file('${unique_name}/tmp3*.csv')"
|
|
|
|
|
if [ -e "${tmp_dir}/processed_tmp3_1.csv" ]; then
|
2023-05-07 17:43:34 +00:00
|
|
|
|
echo "processed_tmp3_1.csv"
|
|
|
|
|
fi
|
2023-05-16 18:48:39 +00:00
|
|
|
|
if [ -e "${tmp_dir}/processed_tmp3_2.csv" ]; then
|
2023-05-07 17:43:34 +00:00
|
|
|
|
echo "processed_tmp3_2.csv"
|
|
|
|
|
fi
|
2023-05-16 18:48:39 +00:00
|
|
|
|
if [ ! -e "${tmp_dir}/tmp3_1.csv" ]; then
|
2023-05-07 17:43:34 +00:00
|
|
|
|
echo "!tmp3_1.csv"
|
2023-05-07 14:59:40 +00:00
|
|
|
|
fi
|
2023-05-16 18:48:39 +00:00
|
|
|
|
if [ ! -e "${tmp_dir}/tmp3_2.csv" ]; then
|
2023-05-07 17:43:34 +00:00
|
|
|
|
echo "!tmp3_2.csv"
|
2023-05-07 14:59:40 +00:00
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# check timestamp placeholder
|
2023-05-16 18:48:39 +00:00
|
|
|
|
${CLICKHOUSE_CLIENT} --rename-files-after-processing="processed_%f_%t.csv" -q "SELECT COUNT(*) FROM file('${unique_name}/tmp4.csv')"
|
|
|
|
|
# ls ${tmp_dir} | grep -E "^processed_tmp4_[0-9]+\.csv$" > /dev/null && echo "OK"
|
2023-05-07 17:43:34 +00:00
|
|
|
|
rg="processed_tmp4_[0-9]+\.csv"
|
2023-05-16 18:48:39 +00:00
|
|
|
|
for x in "${tmp_dir}"/processed*; do
|
2023-05-07 17:43:34 +00:00
|
|
|
|
if [[ $x =~ $rg ]]; then
|
|
|
|
|
echo "OK"
|
|
|
|
|
break
|
|
|
|
|
fi;
|
|
|
|
|
done
|
2023-05-07 14:59:40 +00:00
|
|
|
|
|
|
|
|
|
### Checking errors
|
|
|
|
|
|
|
|
|
|
# cannot overwrite an existing file
|
2023-05-16 18:48:39 +00:00
|
|
|
|
${CLICKHOUSE_CLIENT} --rename-files-after-processing="tmp.csv" -q "SELECT COUNT(*) FROM file('${unique_name}/tmp5.csv')" \
|
2023-05-07 14:59:40 +00:00
|
|
|
|
2>&1| grep "already exists" > /dev/null && echo "OK"
|
2023-05-16 18:48:39 +00:00
|
|
|
|
if [ -e "${tmp_dir}/tmp5.csv" ]; then
|
2023-05-07 17:43:34 +00:00
|
|
|
|
echo "tmp5.csv"
|
|
|
|
|
fi
|
2023-05-07 14:59:40 +00:00
|
|
|
|
|
2023-05-16 18:48:39 +00:00
|
|
|
|
# сannot move file outside user_files
|
|
|
|
|
${CLICKHOUSE_CLIENT} --rename-files-after-processing="../../%f%e" -q "SELECT COUNT(*) FROM file('${unique_name}/tmp5.csv')" \
|
2023-05-07 14:59:40 +00:00
|
|
|
|
2>&1| grep "is not inside" > /dev/null && echo "OK"
|
2023-05-16 18:48:39 +00:00
|
|
|
|
if [ -e "${tmp_dir}/tmp5.csv" ]; then
|
|
|
|
|
echo "tmp5.csv"
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# check invalid placeholders
|
|
|
|
|
|
|
|
|
|
# unknown type of placeholder (%k)
|
|
|
|
|
${CLICKHOUSE_CLIENT} --rename-files-after-processing="processed_%f_%k" -q "SELECT COUNT(*) FROM file('${unique_name}/tmp5.csv')" \
|
|
|
|
|
2>&1| grep "Allowed placeholders only" > /dev/null && echo "OK"
|
|
|
|
|
if [ -e "${tmp_dir}/tmp5.csv" ]; then
|
|
|
|
|
echo "tmp5.csv"
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# dd number of consecutive percentage signs after replace valid placeholders
|
|
|
|
|
${CLICKHOUSE_CLIENT} --rename-files-after-processing="processed_%f_%%%%e" -q "SELECT COUNT(*) FROM file('${unique_name}/tmp5.csv')" \
|
|
|
|
|
2>&1| grep "Odd number of consecutive percentage signs" > /dev/null && echo "OK"
|
|
|
|
|
if [ -e "${tmp_dir}/tmp5.csv" ]; then
|
2023-05-07 17:43:34 +00:00
|
|
|
|
echo "tmp5.csv"
|
|
|
|
|
fi
|
2023-05-07 14:59:40 +00:00
|
|
|
|
|
2023-06-29 13:35:27 +00:00
|
|
|
|
# check full file name placeholder
|
|
|
|
|
${CLICKHOUSE_CLIENT} --rename-files-after-processing="%a.processed" -q "SELECT COUNT(*) FROM file('${unique_name}/tmp6.csv')"
|
|
|
|
|
if [ -e "${tmp_dir}/tmp6.csv.processed" ]; then
|
|
|
|
|
echo "tmp6.csv.processed"
|
|
|
|
|
fi
|
|
|
|
|
if [ ! -e "${tmp_dir}/tmp6.csv" ]; then
|
|
|
|
|
echo "!tmp6.csv"
|
|
|
|
|
fi
|
|
|
|
|
|
2023-05-07 14:59:40 +00:00
|
|
|
|
# Clean
|
2023-05-16 18:48:39 +00:00
|
|
|
|
rm -rd $tmp_dir
|