Add tests for globs

This commit is contained in:
Antonio Andelic 2023-08-03 10:15:02 +00:00
parent 96bad0537a
commit d9f136ce0c
7 changed files with 390 additions and 93 deletions

View File

@ -254,7 +254,8 @@ template <typename ArchiveInfo>
LibArchiveReader<ArchiveInfo>::FileInfo LibArchiveReader<ArchiveInfo>::getFileInfo(const String & filename)
{
Handle handle(path_to_archive);
handle.locateFile(filename);
if (!handle.locateFile(filename))
throw Exception(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Couldn't unpack archive {}: file not found", path_to_archive);
return handle.getFileInfo();
}
@ -278,7 +279,9 @@ template <typename ArchiveInfo>
std::unique_ptr<ReadBufferFromFileBase> LibArchiveReader<ArchiveInfo>::readFile(NameFilter filter)
{
Handle handle(path_to_archive);
handle.locateFile(filter);
if (!handle.locateFile(filter))
throw Exception(
ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Couldn't unpack archive {}: no file found satisfying the filter", path_to_archive);
return std::make_unique<ReadBufferFromLibArchive>(std::move(handle), path_to_archive);
}

View File

@ -1403,6 +1403,9 @@ SinkToStoragePtr StorageFile::write(
ContextPtr context,
bool /*async_insert*/)
{
if (!use_table_fd && !paths_to_archive.empty())
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Writing to archives is not supported");
if (format_name == "Distributed")
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method write is not implemented for Distributed format");

View File

@ -3,50 +3,54 @@ Running for zip files
3 4
1 2
3 4
1
3
1
3
1
3
1
3
1 2
1 2
3 4
3 4
1 2
1 2
3 4
3 4
OK
Running for tar.gz files
1 2
3 4
1 2
3 4
1
3
1
3
1
3
1
3
1 2
1 2
3 4
3 4
1 2
1 2
3 4
3 4
OK
Running for tar files
1 2
3 4
1 2
3 4
1
3
1
3
1
3
1
3
1 2
1 2
3 4
3 4
1 2
1 2
3 4
3 4
OK
Running for 7z files
1 2
3 4
1 2
3 4
1
3
1
3
1
3
1
3
1 2
1 2
3 4
3 4
1 2
1 2
3 4
3 4
OK

View File

@ -0,0 +1,36 @@
#!/usr/bin/env bash
# Tags: no-fasttest
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
source ${CUR_DIR}/02661_read_from_archive.lib
$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS 02661_archive_table"
user_files_path=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
echo -e "1,2\n3,4" > ${CLICKHOUSE_TEST_UNIQUE_NAME}_data.csv
function run_archive_test() {
echo "Running for $1 files"
eval "$2 ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive1.$1 ${CLICKHOUSE_TEST_UNIQUE_NAME}_data.csv > /dev/null"
eval "$2 ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive2.$1 ${CLICKHOUSE_TEST_UNIQUE_NAME}_data.csv > /dev/null"
read_archive_file "${CLICKHOUSE_TEST_UNIQUE_NAME}_archive1.$1 :: ${CLICKHOUSE_TEST_UNIQUE_NAME}_data.csv"
read_archive_file "${CLICKHOUSE_TEST_UNIQUE_NAME}_archive{1..2}.$1 :: ${CLICKHOUSE_TEST_UNIQUE_NAME}_data.csv"
$CLICKHOUSE_LOCAL --query "SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive1.$1::nonexistent.csv')" 2>&1 | grep -q "CANNOT_UNPACK_ARCHIVE" && echo "OK" || echo "FAIL"
rm ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive1.$1
rm ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive2.$1
}
run_archive_test "zip" "zip"
run_archive_test "tar.gz" "tar -cvzf"
run_archive_test "tar" "tar -cvf"
run_archive_test "7z" "7z a"
rm ${CLICKHOUSE_TEST_UNIQUE_NAME}_data.csv

View File

@ -0,0 +1,264 @@
Running for zip files
archive{1,2} data{1,3}.csv
1 2
1 2
3 4
3 4
9 10
11 12
1 2
1 2
3 4
3 4
9 10
11 12
archive3 data*.csv
5 6
7 8
9 10
11 12
5 6
7 8
9 10
11 12
archive* *.csv
1 2
1 2
3 4
3 4
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
1 2
1 2
3 4
3 4
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
archive* {2..3}.csv
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
OK
Running for tar.gz files
archive{1,2} data{1,3}.csv
1 2
1 2
3 4
3 4
9 10
11 12
1 2
1 2
3 4
3 4
9 10
11 12
archive3 data*.csv
5 6
7 8
9 10
11 12
5 6
7 8
9 10
11 12
archive* *.csv
1 2
1 2
3 4
3 4
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
1 2
1 2
3 4
3 4
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
archive* {2..3}.csv
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
OK
Running for tar files
archive{1,2} data{1,3}.csv
1 2
1 2
3 4
3 4
9 10
11 12
1 2
1 2
3 4
3 4
9 10
11 12
archive3 data*.csv
5 6
7 8
9 10
11 12
5 6
7 8
9 10
11 12
archive* *.csv
1 2
1 2
3 4
3 4
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
1 2
1 2
3 4
3 4
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
archive* {2..3}.csv
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
OK
Running for 7z files
archive{1,2} data{1,3}.csv
1 2
1 2
3 4
3 4
9 10
11 12
1 2
1 2
3 4
3 4
9 10
11 12
archive3 data*.csv
5 6
7 8
9 10
11 12
5 6
7 8
9 10
11 12
archive* *.csv
1 2
1 2
3 4
3 4
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
1 2
1 2
3 4
3 4
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
archive* {2..3}.csv
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
5 6
5 6
7 8
7 8
9 10
9 10
11 12
11 12
OK

View File

@ -0,0 +1,46 @@
#!/usr/bin/env bash
# Tags: no-fasttest, long
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
source ${CUR_DIR}/02661_read_from_archive.lib
$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS 02661_archive_table"
user_files_path=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
echo -e "1,2\n3,4" > ${CLICKHOUSE_TEST_UNIQUE_NAME}_data1.csv
echo -e "5,6\n7,8" > ${CLICKHOUSE_TEST_UNIQUE_NAME}_data2.csv
echo -e "9,10\n11,12" > ${CLICKHOUSE_TEST_UNIQUE_NAME}_data3.csv
function run_archive_test() {
echo "Running for $1 files"
eval "$2 ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_with_globs_archive1.$1 ${CLICKHOUSE_TEST_UNIQUE_NAME}_data1.csv ${CLICKHOUSE_TEST_UNIQUE_NAME}_data2.csv > /dev/null"
eval "$2 ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_with_globs_archive2.$1 ${CLICKHOUSE_TEST_UNIQUE_NAME}_data1.csv ${CLICKHOUSE_TEST_UNIQUE_NAME}_data3.csv > /dev/null"
eval "$2 ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_with_globs_archive3.$1 ${CLICKHOUSE_TEST_UNIQUE_NAME}_data2.csv ${CLICKHOUSE_TEST_UNIQUE_NAME}_data3.csv > /dev/null"
echo "archive{1,2} data{1,3}.csv"
read_archive_file "${CLICKHOUSE_TEST_UNIQUE_NAME}_with_globs_archive{1,2}.$1 :: ${CLICKHOUSE_TEST_UNIQUE_NAME}_data{1,3}.csv"
echo "archive3 data*.csv"
read_archive_file "${CLICKHOUSE_TEST_UNIQUE_NAME}_with_globs_archive3.$1 :: ${CLICKHOUSE_TEST_UNIQUE_NAME}_data*.csv"
echo "archive* *.csv"
read_archive_file "${CLICKHOUSE_TEST_UNIQUE_NAME}_with_globs_archive*.$1 :: *.csv"
echo "archive* {2..3}.csv"
read_archive_file "${CLICKHOUSE_TEST_UNIQUE_NAME}_with_globs_archive*.$1 :: ${CLICKHOUSE_TEST_UNIQUE_NAME}_data{2..3}.csv"
$CLICKHOUSE_LOCAL --query "SELECT * FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_with_globs_archive3.$1::{2..3}.csv')" 2>&1 | grep -q "CANNOT_UNPACK_ARCHIVE" && echo "OK" || echo "FAIL"
rm ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_with_globs_archive1.$1
rm ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_with_globs_archive2.$1
rm ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_with_globs_archive3.$1
}
run_archive_test "zip" "zip"
run_archive_test "tar.gz" "tar -cvzf"
run_archive_test "tar" "tar -cvf"
run_archive_test "7z" "7z a"
rm ${CLICKHOUSE_TEST_UNIQUE_NAME}_data{1..3}.csv

View File

@ -1,59 +0,0 @@
#!/usr/bin/env bash
# Tags: no-fasttest
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS 02661_archive_table"
user_files_path=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
echo -e "1,2\n3,4" > ${CLICKHOUSE_TEST_UNIQUE_NAME}_data.csv
function read_archive_file() {
$CLICKHOUSE_LOCAL --query "SELECT $1 FROM file('${user_files_path}/$2')"
$CLICKHOUSE_CLIENT --query "CREATE TABLE 02661_archive_table Engine=File('CSV', '${user_files_path}/$2')"
$CLICKHOUSE_CLIENT --query "SELECT $1 FROM 02661_archive_table"
$CLICKHOUSE_CLIENT --query "DROP TABLE 02661_archive_table"
}
function run_archive_test() {
echo "Running for $1 files"
read_archive_file "*" "${CLICKHOUSE_TEST_UNIQUE_NAME}_archive1.$1 :: ${CLICKHOUSE_TEST_UNIQUE_NAME}_data.csv"
read_archive_file "c1" "${CLICKHOUSE_TEST_UNIQUE_NAME}_archive{1..2}.$1 :: ${CLICKHOUSE_TEST_UNIQUE_NAME}_data.csv"
}
zip ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive1.zip ${CLICKHOUSE_TEST_UNIQUE_NAME}_data.csv > /dev/null
zip ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive2.zip ${CLICKHOUSE_TEST_UNIQUE_NAME}_data.csv > /dev/null
run_archive_test "zip"
rm ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive1.zip
rm ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive2.zip
tar -cvzf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive1.tar.gz ${CLICKHOUSE_TEST_UNIQUE_NAME}_data.csv &> /dev/null
tar -cvzf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive2.tar.gz ${CLICKHOUSE_TEST_UNIQUE_NAME}_data.csv &> /dev/null
run_archive_test "tar.gz"
rm ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive1.tar.gz
rm ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive2.tar.gz
tar -cvf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive1.tar ${CLICKHOUSE_TEST_UNIQUE_NAME}_data.csv &> /dev/null
tar -cvf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive2.tar ${CLICKHOUSE_TEST_UNIQUE_NAME}_data.csv &> /dev/null
run_archive_test "tar"
rm ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive1.tar
rm ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive2.tar
7z a ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive1.7z ${CLICKHOUSE_TEST_UNIQUE_NAME}_data.csv > /dev/null
7z a ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive2.7z ${CLICKHOUSE_TEST_UNIQUE_NAME}_data.csv > /dev/null
run_archive_test "7z"
rm ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive1.7z
rm ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive2.7z
rm ${CLICKHOUSE_TEST_UNIQUE_NAME}_data.csv