Extend tests

This commit is contained in:
Antonio Andelic 2023-07-31 13:27:06 +00:00
parent 9fb86f134b
commit 6a1cf5c683
7 changed files with 97 additions and 12 deletions

View File

@ -146,4 +146,6 @@ target_compile_definitions(_libarchive PUBLIC
target_compile_options(_libarchive PRIVATE "-Wno-reserved-macro-identifier")
target_link_libraries(_libarchive PRIVATE ch_contrib::zlib ch_contrib::xz)
add_library(ch_contrib::libarchive ALIAS _libarchive)

View File

@ -843,7 +843,7 @@ typedef uint64_t uintmax_t;
/* #undef HAVE_LZMADEC_H */
/* Define to 1 if you have the <lzma.h> header file. */
/* #undef HAVE_LZMA_H */
#define HAVE_LZMA_H 1
/* Define to 1 if you have a working `lzma_stream_encoder_mt' function. */
/* #undef HAVE_LZMA_STREAM_ENCODER_MT */
@ -909,7 +909,7 @@ typedef uint64_t uintmax_t;
/* #undef HAVE_NETTLE_SHA_H */
/* Define to 1 if you have the `nl_langinfo' function. */
#define HAVE_NL_LANGINFO 1
/* #undef HAVE_NL_LANGINFO */
/* Define to 1 if you have the `openat' function. */
#define HAVE_OPENAT 1
@ -1249,7 +1249,7 @@ typedef uint64_t uintmax_t;
#define HAVE_WORKING_FS_IOC_GETFLAGS 1
/* Define to 1 if you have the <zlib.h> header file. */
/* #undef HAVE_ZLIB_H */
#define HAVE_ZLIB_H 1
/* Define to 1 if you have the <zstd.h> header file. */
/* #undef HAVE_ZSTD_H */

View File

@ -41,6 +41,8 @@ RUN apt-get update -y \
zstd \
file \
pv \
zip \
p7zip-full \
&& apt-get clean
RUN pip3 install numpy scipy pandas Jinja2

View File

@ -116,6 +116,9 @@ private:
{
auto bytes_read = archive_read_data(handle.archive, internal_buffer.begin(), static_cast<int>(internal_buffer.size()));
if (bytes_read < 0)
throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Failed to read file {} from {}: {}", filename, path_to_archive, archive_error_string(handle.archive));
if (!bytes_read)
return false;
@ -123,6 +126,7 @@ private:
working_buffer.resize(bytes_read);
return true;
}
Handle handle;
const String path_to_archive;
const String filename;
@ -166,7 +170,7 @@ LibArchiveReader<ArchiveInfo>::FileInfo LibArchiveReader<ArchiveInfo>::getFileIn
template <typename ArchiveInfo>
std::unique_ptr<typename LibArchiveReader<ArchiveInfo>::FileEnumerator> LibArchiveReader<ArchiveInfo>::firstFile()
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Iterating files not implementaed for {} archives", ArchiveInfo::name);
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Iterating files not implemented for {} archives", ArchiveInfo::name);
}
template <typename ArchiveInfo>
@ -181,14 +185,14 @@ std::unique_ptr<ReadBufferFromFileBase> LibArchiveReader<ArchiveInfo>::readFile(
template <typename ArchiveInfo>
std::unique_ptr<ReadBufferFromFileBase> LibArchiveReader<ArchiveInfo>::readFile(std::unique_ptr<FileEnumerator> /*enumerator*/)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Iterating files not implementaed for {} archives", ArchiveInfo::name);
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Iterating files not implemented for {} archives", ArchiveInfo::name);
}
template <typename ArchiveInfo>
std::unique_ptr<typename LibArchiveReader<ArchiveInfo>::FileEnumerator>
LibArchiveReader<ArchiveInfo>::nextFile(std::unique_ptr<ReadBuffer> /*read_buffer*/)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Iterating files not implementaed for {} archives", ArchiveInfo::name);
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Iterating files not implemented for {} archives", ArchiveInfo::name);
}

View File

@ -32,7 +32,7 @@ std::shared_ptr<IArchiveReader> createArchiveReader(
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "minizip library is disabled");
#endif
}
else if (path_to_archive.ends_with(".tar"))
else if (path_to_archive.ends_with(".tar") || path_to_archive.ends_with("tar.gz"))
{
#if USE_LIBARCHIVE
return std::make_shared<TarArchiveReader>(path_to_archive, archive_read_function);

View File

@ -1,7 +1,51 @@
Reading 02661_select_from_table_in_archive_test_archive1.zip :: 02661_select_from_table_in_archive_test_data.csv
1 2
3 4
1 2
3 4
Reading 02661_select_from_table_in_archive_test_archive{1..2}.zip :: 02661_select_from_table_in_archive_test_data.csv
1
3
1
3
1
3
1
3
Reading 02661_select_from_table_in_archive_test_archive1.tar.gz :: 02661_select_from_table_in_archive_test_data.csv
1 2
3 4
1 2
3 4
Reading 02661_select_from_table_in_archive_test_archive{1..2}.tar.gz :: 02661_select_from_table_in_archive_test_data.csv
1
3
1
3
1
3
1
3
Reading 02661_select_from_table_in_archive_test_archive1.tar :: 02661_select_from_table_in_archive_test_data.csv
1 2
3 4
1 2
3 4
Reading 02661_select_from_table_in_archive_test_archive{1..2}.tar :: 02661_select_from_table_in_archive_test_data.csv
1
3
1
3
1
3
1
3
Reading 02661_select_from_table_in_archive_test_archive1.7z :: 02661_select_from_table_in_archive_test_data.csv
1 2
3 4
1 2
3 4
Reading 02661_select_from_table_in_archive_test_archive{1..2}.7z :: 02661_select_from_table_in_archive_test_data.csv
1
3
1

View File

@ -9,19 +9,52 @@ $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS 02661_archive_table"
user_files_path=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
echo -e "1,2\n3,4" > ${CLICKHOUSE_TEST_UNIQUE_NAME}_data.csv
zip ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive1.zip ${CLICKHOUSE_TEST_UNIQUE_NAME}_data.csv > /dev/null
zip ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive2.zip ${CLICKHOUSE_TEST_UNIQUE_NAME}_data.csv > /dev/null
function read_archive_file() {
echo "Reading $2"
$CLICKHOUSE_LOCAL --query "SELECT $1 FROM file('${user_files_path}/$2')"
$CLICKHOUSE_CLIENT --query "CREATE TABLE 02661_archive_table Engine=File('CSV', '${user_files_path}/$2')"
$CLICKHOUSE_CLIENT --query "SELECT $1 FROM 02661_archive_table"
$CLICKHOUSE_CLIENT --query "DROP TABLE 02661_archive_table"
}
read_archive_file "*" "${CLICKHOUSE_TEST_UNIQUE_NAME}_archive1.zip :: ${CLICKHOUSE_TEST_UNIQUE_NAME}_data.csv"
read_archive_file "c1" "${CLICKHOUSE_TEST_UNIQUE_NAME}_archive{1..2}.zip :: ${CLICKHOUSE_TEST_UNIQUE_NAME}_data.csv"
function run_archive_test() {
read_archive_file "*" "${CLICKHOUSE_TEST_UNIQUE_NAME}_archive1.$1 :: ${CLICKHOUSE_TEST_UNIQUE_NAME}_data.csv"
read_archive_file "c1" "${CLICKHOUSE_TEST_UNIQUE_NAME}_archive{1..2}.$1 :: ${CLICKHOUSE_TEST_UNIQUE_NAME}_data.csv"
}
zip ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive1.zip ${CLICKHOUSE_TEST_UNIQUE_NAME}_data.csv > /dev/null
zip ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive2.zip ${CLICKHOUSE_TEST_UNIQUE_NAME}_data.csv > /dev/null
run_archive_test "zip"
rm ${CLICKHOUSE_TEST_UNIQUE_NAME}_data.csv
rm ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive1.zip
rm ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive2.zip
tar -cvzf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive1.tar.gz ${CLICKHOUSE_TEST_UNIQUE_NAME}_data.csv > /dev/null
tar -cvzf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive2.tar.gz ${CLICKHOUSE_TEST_UNIQUE_NAME}_data.csv > /dev/null
run_archive_test "tar.gz"
rm ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive1.tar.gz
rm ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive2.tar.gz
tar -cvf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive1.tar ${CLICKHOUSE_TEST_UNIQUE_NAME}_data.csv > /dev/null
tar -cvf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive2.tar ${CLICKHOUSE_TEST_UNIQUE_NAME}_data.csv > /dev/null
run_archive_test "tar"
rm ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive1.tar
rm ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive2.tar
7z a ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive1.7z ${CLICKHOUSE_TEST_UNIQUE_NAME}_data.csv > /dev/null
7z a ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive2.7z ${CLICKHOUSE_TEST_UNIQUE_NAME}_data.csv > /dev/null
run_archive_test "7z"
rm ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive1.7z
rm ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive2.7z
rm ${CLICKHOUSE_TEST_UNIQUE_NAME}_data.csv