From 9fb86f134b8e72efc214e9089a523e8bd866ff18 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 31 Jul 2023 12:04:27 +0000 Subject: [PATCH] Fix tests --- src/Client/ClientBase.cpp | 1 + src/Storages/StorageFile.cpp | 43 ++++++++++++++++--- src/Storages/StorageFile.h | 4 +- src/TableFunctions/ITableFunctionFileLike.cpp | 26 +---------- src/TableFunctions/ITableFunctionFileLike.h | 2 +- src/TableFunctions/TableFunctionFile.cpp | 4 ++ ...661_select_from_table_in_archive.reference | 7 ++- .../02661_select_from_table_in_archive.sh | 25 ++++++++--- 8 files changed, 72 insertions(+), 40 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 06dabf96c28..407156698e5 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1436,6 +1436,7 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des ConstraintsDescription{}, String{}, {}, + String{}, }; StoragePtr storage = std::make_shared(in_file, global_context->getUserFilesPath(), args); storage->startup(); diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 54ab2555b4e..d59c1b4a1b4 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -380,11 +380,11 @@ std::unique_ptr createReadBuffer( int table_fd, const String & compression_method, ContextPtr context, - const String & path_to_archive = "auto") + const String & path_to_archive = "") { CompressionMethod method; - if (path_to_archive != "auto") + if (!path_to_archive.empty()) { auto reader = createArchiveReader(path_to_archive); std::unique_ptr in = reader->readFile(current_path); @@ -591,7 +591,7 @@ StorageFile::StorageFile(int table_fd_, CommonArguments args) StorageFile::StorageFile(const std::string & table_path_, const std::string & user_files_path, CommonArguments args) : StorageFile(args) { - if (args.path_to_archive != "auto") + if (!args.path_to_archive.empty()) { paths_to_archive = getPathsList(args.path_to_archive, user_files_path, args.getContext(), total_bytes_to_read); paths = {table_path_}; @@ -600,6 +600,7 @@ StorageFile::StorageFile(const std::string & table_path_, const std::string & us { paths = getPathsList(table_path_, user_files_path, args.getContext(), total_bytes_to_read); } + is_db_table = false; is_path_with_globs = paths.size() > 1; if (!paths.empty()) @@ -822,8 +823,11 @@ public: if (!storage->use_table_fd) { size_t current_file = 0, current_archive = 0; - if (files_info->files.size() == 1 && !files_info->paths_to_archive.empty()) + if (!files_info->paths_to_archive.empty()) { + if (files_info->files.size() != 1) + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Can only read a single file from archive"); + current_archive = files_info->next_archive_to_read.fetch_add(1); if (current_archive >= files_info->paths_to_archive.size()) return {}; @@ -1478,6 +1482,7 @@ void registerStorageFile(StorageFactory & factory) factory_args.constraints, factory_args.comment, {}, + {}, }; ASTs & engine_args_ast = factory_args.engine_args; @@ -1548,7 +1553,7 @@ void registerStorageFile(StorageFactory & factory) else if (type == Field::Types::UInt64) source_fd = static_cast(literal->value.get()); else if (type == Field::Types::String) - source_path = literal->value.get(); + StorageFile::parseFileSource(literal->value.get(), source_path, storage_args.path_to_archive); else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second argument must be path or file descriptor"); } @@ -1620,4 +1625,32 @@ void StorageFile::addColumnsToCache( schema_cache.addMany(cache_keys, columns); } +void StorageFile::parseFileSource(String source, String & filename, String & path_to_archive) +{ + size_t pos = source.find("::"); + if (pos == String::npos) + { + filename = std::move(source); + return; + } + + std::string_view path_to_archive_view = std::string_view{source}.substr(0, pos); + while (path_to_archive_view.back() == ' ') + path_to_archive_view.remove_suffix(1); + + if (path_to_archive_view.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path to archive is empty"); + + path_to_archive = path_to_archive_view; + + std::string_view filename_view = std::string_view{source}.substr(pos + 2); + while (filename_view.front() == ' ') + filename_view.remove_prefix(1); + + if (filename_view.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Filename is empty"); + + filename = filename_view; +} + } diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index 10c234811ff..96dcfb4cb54 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -23,7 +23,7 @@ public: const ConstraintsDescription & constraints; const String & comment; const std::string rename_after_processing; - std::string path_to_archive = "auto"; + std::string path_to_archive; }; /// From file descriptor @@ -93,6 +93,8 @@ public: static SchemaCache & getSchemaCache(const ContextPtr & context); + static void parseFileSource(String source, String & filename, String & path_to_archive); + protected: friend class StorageFileSource; friend class StorageFileSink; diff --git a/src/TableFunctions/ITableFunctionFileLike.cpp b/src/TableFunctions/ITableFunctionFileLike.cpp index baab7f5143e..2ddc0b6dba1 100644 --- a/src/TableFunctions/ITableFunctionFileLike.cpp +++ b/src/TableFunctions/ITableFunctionFileLike.cpp @@ -25,31 +25,7 @@ namespace ErrorCodes void ITableFunctionFileLike::parseFirstArguments(const ASTPtr & arg, const ContextPtr &) { String path = checkAndGetLiteralArgument(arg, "source"); - size_t pos = path.find("::"); - if (pos == String::npos) - { - filename = path; - } - else - { - std::string_view path_to_archive_view = std::string_view{path}.substr(0, pos); - while (path_to_archive_view.back() == ' ') - path_to_archive_view.remove_suffix(1); - - if (path_to_archive_view.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path to archive is empty"); - - path_to_archive = path_to_archive_view; - - std::string_view filename_view = std::string_view{path}.substr(pos + 2); - while (filename_view.front() == ' ') - filename_view.remove_prefix(1); - - if (filename_view.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Filename is empty"); - - filename = filename_view; - } + StorageFile::parseFileSource(std::move(path), filename, path_to_archive); } String ITableFunctionFileLike::getFormatFromFirstArgument() diff --git a/src/TableFunctions/ITableFunctionFileLike.h b/src/TableFunctions/ITableFunctionFileLike.h index 81a38d53d8c..7f212cb60e9 100644 --- a/src/TableFunctions/ITableFunctionFileLike.h +++ b/src/TableFunctions/ITableFunctionFileLike.h @@ -42,7 +42,7 @@ protected: virtual String getFormatFromFirstArgument(); String filename; - String path_to_archive = "auto"; + String path_to_archive; String format = "auto"; String structure = "auto"; String compression_method = "auto"; diff --git a/src/TableFunctions/TableFunctionFile.cpp b/src/TableFunctions/TableFunctionFile.cpp index 1ba40e25bc0..3319d163410 100644 --- a/src/TableFunctions/TableFunctionFile.cpp +++ b/src/TableFunctions/TableFunctionFile.cpp @@ -39,6 +39,8 @@ void TableFunctionFile::parseFirstArguments(const ASTPtr & arg, const ContextPtr fd = STDOUT_FILENO; else if (filename == "stderr") fd = STDERR_FILENO; + else + StorageFile::parseFileSource(std::move(filename), filename, path_to_archive); } else if (type == Field::Types::Int64 || type == Field::Types::UInt64) { @@ -78,6 +80,7 @@ StoragePtr TableFunctionFile::getStorage(const String & source, global_context->getSettingsRef().rename_files_after_processing, path_to_archive, }; + if (fd >= 0) return std::make_shared(fd, args); @@ -98,6 +101,7 @@ ColumnsDescription TableFunctionFile::getActualTableStructure(ContextPtr context paths = StorageFile::getPathsList(filename, context->getUserFilesPath(), context, total_bytes_to_read); else paths_to_archives = StorageFile::getPathsList(path_to_archive, context->getUserFilesPath(), context, total_bytes_to_read); + return StorageFile::getTableStructureFromFile(format, paths, compression_method, std::nullopt, context, paths_to_archives); } diff --git a/tests/queries/0_stateless/02661_select_from_table_in_archive.reference b/tests/queries/0_stateless/02661_select_from_table_in_archive.reference index 9869a226367..a13e8c56aa4 100644 --- a/tests/queries/0_stateless/02661_select_from_table_in_archive.reference +++ b/tests/queries/0_stateless/02661_select_from_table_in_archive.reference @@ -1,7 +1,12 @@ 1 2 3 4 +1 2 +3 4 +1 +3 +1 +3 1 3 1 3 - diff --git a/tests/queries/0_stateless/02661_select_from_table_in_archive.sh b/tests/queries/0_stateless/02661_select_from_table_in_archive.sh index a0e165989ae..9366b7bd654 100755 --- a/tests/queries/0_stateless/02661_select_from_table_in_archive.sh +++ b/tests/queries/0_stateless/02661_select_from_table_in_archive.sh @@ -4,13 +4,24 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -echo -e "1,2\n3,4" >${CLICKHOUSE_TEST_UNIQUE_NAME}_data.csv -zip ${CLICKHOUSE_TEST_UNIQUE_NAME}_archive1.zip ${CLICKHOUSE_TEST_UNIQUE_NAME}_data.csv > /dev/null -zip ${CLICKHOUSE_TEST_UNIQUE_NAME}_archive2.zip ${CLICKHOUSE_TEST_UNIQUE_NAME}_data.csv > /dev/null +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS 02661_archive_table" -$CLICKHOUSE_LOCAL --query "SELECT * FROM file('${CLICKHOUSE_TEST_UNIQUE_NAME}_archive1.zip :: ${CLICKHOUSE_TEST_UNIQUE_NAME}_data.csv')" -$CLICKHOUSE_LOCAL --query "SELECT c1 FROM file('${CLICKHOUSE_TEST_UNIQUE_NAME}_archive{1..2}.zip :: ${CLICKHOUSE_TEST_UNIQUE_NAME}_data.csv')" +user_files_path=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') + +echo -e "1,2\n3,4" > ${CLICKHOUSE_TEST_UNIQUE_NAME}_data.csv +zip ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive1.zip ${CLICKHOUSE_TEST_UNIQUE_NAME}_data.csv > /dev/null +zip ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive2.zip ${CLICKHOUSE_TEST_UNIQUE_NAME}_data.csv > /dev/null + +function read_archive_file() { + $CLICKHOUSE_LOCAL --query "SELECT $1 FROM file('${user_files_path}/$2')" + $CLICKHOUSE_CLIENT --query "CREATE TABLE 02661_archive_table Engine=File('CSV', '${user_files_path}/$2')" + $CLICKHOUSE_CLIENT --query "SELECT $1 FROM 02661_archive_table" + $CLICKHOUSE_CLIENT --query "DROP TABLE 02661_archive_table" +} + +read_archive_file "*" "${CLICKHOUSE_TEST_UNIQUE_NAME}_archive1.zip :: ${CLICKHOUSE_TEST_UNIQUE_NAME}_data.csv" +read_archive_file "c1" "${CLICKHOUSE_TEST_UNIQUE_NAME}_archive{1..2}.zip :: ${CLICKHOUSE_TEST_UNIQUE_NAME}_data.csv" rm ${CLICKHOUSE_TEST_UNIQUE_NAME}_data.csv -rm ${CLICKHOUSE_TEST_UNIQUE_NAME}_archive1.zip -rm ${CLICKHOUSE_TEST_UNIQUE_NAME}_archive2.zip +rm ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive1.zip +rm ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}_archive2.zip