From 751185a366307082beb142a7d07470d7efe39161 Mon Sep 17 00:00:00 2001 From: wuxiaobai24 Date: Sat, 23 Apr 2022 11:01:27 +0800 Subject: [PATCH 1/4] allow file descriptor in table function file() --- src/TableFunctions/TableFunctionFile.cpp | 84 +++++++++++++++++++ src/TableFunctions/TableFunctionFile.h | 4 + ...escriptor_in_table_function_file.reference | 8 ++ ..._file_descriptor_in_table_function_file.sh | 15 ++++ 4 files changed, 111 insertions(+) create mode 100644 tests/queries/0_stateless/02286_use_file_descriptor_in_table_function_file.reference create mode 100755 tests/queries/0_stateless/02286_use_file_descriptor_in_table_function_file.sh diff --git a/src/TableFunctions/TableFunctionFile.cpp b/src/TableFunctions/TableFunctionFile.cpp index b09bb8b6ae1..78dfaeefafe 100644 --- a/src/TableFunctions/TableFunctionFile.cpp +++ b/src/TableFunctions/TableFunctionFile.cpp @@ -7,10 +7,92 @@ #include #include #include +#include +#include +#include namespace DB { +namespace ErrorCodes +{ + extern const int UNKNOWN_IDENTIFIER; + extern const int LOGICAL_ERROR; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int BAD_ARGUMENTS; +} + +void TableFunctionFile::parseArguments(const ASTPtr & ast_function, ContextPtr context) +{ + /// Parse args + ASTs & args_func = ast_function->children; + + if (args_func.size() != 1) + throw Exception("Table function '" + getName() + "' must have arguments.", ErrorCodes::LOGICAL_ERROR); + + ASTs & args = args_func.at(0)->children; + + if (args.empty()) + throw Exception("Table function '" + getName() + "' requires at least 1 argument", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + if (auto opt_name = tryGetIdentifierName(args[0])) { + if (*opt_name == "stdin") + fd = STDIN_FILENO; + else if (*opt_name == "stdout") + fd = STDOUT_FILENO; + else if (*opt_name == "stderr") + fd = STDERR_FILENO; + else + throw Exception("Unknow identifier '" + *opt_name + "' in first second arguments", ErrorCodes::UNKNOWN_IDENTIFIER); + } + else if (const auto * literal = args[0]->as()) { + auto type = literal->value.getType(); + if (type == Field::Types::Int64) + fd = static_cast(literal->value.get()); + else if (type == Field::Types::UInt64) + fd = static_cast(literal->value.get()); + else if (type == Field::Types::String) { + filename = literal->value.get(); + if (filename == "-") + fd = 0; + } + else + throw Exception("The second argument of table function '" + getName() + "' mush be path or file descriptor", ErrorCodes::BAD_ARGUMENTS); + } + + if (args.size() > 1) { + args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(args[1], context); + format = args[1]->as().value.safeGet(); + } + + if (format == "auto") { + if (fd >= 0) + format = FormatFactory::instance().getFormatFromFileDescriptor(fd); + else + format = FormatFactory::instance().getFormatFromFileName(filename, true); + } + + if (args.size() <= 2) + return; + + if (args.size() != 3 && args.size() != 4) + throw Exception("Table function '" + getName() + "' requires 1, 2, 3 or 4 arguments: filename (or file descriptor), format (default auto), structure (default auto) and compression method (default auto)", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + args[2] = evaluateConstantExpressionOrIdentifierAsLiteral(args[2], context); + structure = args[2]->as().value.safeGet(); + + if (structure.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Table structure is empty for table function '{}'. If you want to use automatic schema inference, use 'auto'", + ast_function->formatForErrorMessage()); + + if (args.size() == 4) { + args[3] = evaluateConstantExpressionOrIdentifierAsLiteral(args[3], context); + compression_method = args[3]->as().value.safeGet(); + } +} + StoragePtr TableFunctionFile::getStorage(const String & source, const String & format_, const ColumnsDescription & columns, ContextPtr global_context, const std::string & table_name, @@ -28,6 +110,8 @@ StoragePtr TableFunctionFile::getStorage(const String & source, ConstraintsDescription{}, String{}, }; + if (fd >= 0) + return StorageFile::create(fd, args); return StorageFile::create(source, global_context->getUserFilesPath(), args); } diff --git a/src/TableFunctions/TableFunctionFile.h b/src/TableFunctions/TableFunctionFile.h index f26e4a9c06d..809cbeef9c3 100644 --- a/src/TableFunctions/TableFunctionFile.h +++ b/src/TableFunctions/TableFunctionFile.h @@ -21,6 +21,10 @@ public: } ColumnsDescription getActualTableStructure(ContextPtr context) const override; + void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; + +protected: + int fd = -1; private: StoragePtr getStorage( diff --git a/tests/queries/0_stateless/02286_use_file_descriptor_in_table_function_file.reference b/tests/queries/0_stateless/02286_use_file_descriptor_in_table_function_file.reference new file mode 100644 index 00000000000..b4f55f2fac4 --- /dev/null +++ b/tests/queries/0_stateless/02286_use_file_descriptor_in_table_function_file.reference @@ -0,0 +1,8 @@ +1 2 +3 4 +1 2 +3 4 +1 2 +3 4 +1 2 +3 4 diff --git a/tests/queries/0_stateless/02286_use_file_descriptor_in_table_function_file.sh b/tests/queries/0_stateless/02286_use_file_descriptor_in_table_function_file.sh new file mode 100755 index 00000000000..71067d86729 --- /dev/null +++ b/tests/queries/0_stateless/02286_use_file_descriptor_in_table_function_file.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash +# Tags: no-parallel + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +echo -e "1,2\n3,4" > 02286_data.csv + +$CLICKHOUSE_LOCAL --query "SELECT * FROM file(0, CSV)" < 02286_data.csv +$CLICKHOUSE_LOCAL --query "SELECT * FROM file(stdin, CSV)" < 02286_data.csv +$CLICKHOUSE_LOCAL --query "SELECT * FROM file('-', CSV)" < 02286_data.csv +$CLICKHOUSE_LOCAL --query "SELECT * FROM file(5, CSV)" 5< 02286_data.csv + +rm 02286_data.csv From a70d65647eb5f2b61620eb3acfe11040bb7b6556 Mon Sep 17 00:00:00 2001 From: wuxiaobai24 Date: Wed, 27 Apr 2022 20:15:52 +0800 Subject: [PATCH 2/4] reimplement --- src/TableFunctions/ITableFunctionFileLike.cpp | 19 ++++- src/TableFunctions/ITableFunctionFileLike.h | 3 + src/TableFunctions/TableFunctionFile.cpp | 74 +++++++------------ src/TableFunctions/TableFunctionFile.h | 3 +- 4 files changed, 46 insertions(+), 53 deletions(-) diff --git a/src/TableFunctions/ITableFunctionFileLike.cpp b/src/TableFunctions/ITableFunctionFileLike.cpp index cf4a62a30f4..7fa3ccda195 100644 --- a/src/TableFunctions/ITableFunctionFileLike.cpp +++ b/src/TableFunctions/ITableFunctionFileLike.cpp @@ -25,6 +25,17 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } +void ITableFunctionFileLike::parseFirstArguments(const ASTPtr & arg, ContextPtr context) +{ + auto ast = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); + filename = ast->as().value.safeGet(); +} + +String ITableFunctionFileLike::getFormatFromFirstArgument() +{ + return FormatFactory::instance().getFormatFromFileName(filename, true); +} + void ITableFunctionFileLike::parseArguments(const ASTPtr & ast_function, ContextPtr context) { /// Parse args @@ -38,16 +49,16 @@ void ITableFunctionFileLike::parseArguments(const ASTPtr & ast_function, Context if (args.empty()) throw Exception("Table function '" + getName() + "' requires at least 1 argument", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - for (auto & arg : args) - arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); + parseFirstArguments(args[0], context); - filename = args[0]->as().value.safeGet(); + for (size_t i = 1; i < args.size(); ++i) + args[i] = evaluateConstantExpressionOrIdentifierAsLiteral(args[i], context); if (args.size() > 1) format = args[1]->as().value.safeGet(); if (format == "auto") - format = FormatFactory::instance().getFormatFromFileName(filename, true); + format = getFormatFromFirstArgument(); if (args.size() <= 2) return; diff --git a/src/TableFunctions/ITableFunctionFileLike.h b/src/TableFunctions/ITableFunctionFileLike.h index cd85f20fdc0..88ad75b1018 100644 --- a/src/TableFunctions/ITableFunctionFileLike.h +++ b/src/TableFunctions/ITableFunctionFileLike.h @@ -1,6 +1,7 @@ #pragma once #include +#include "Parsers/IAST_fwd.h" namespace DB { @@ -19,6 +20,8 @@ public: protected: void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; + virtual void parseFirstArguments(const ASTPtr & arg, ContextPtr context); + virtual String getFormatFromFirstArgument(); String filename; String format = "auto"; diff --git a/src/TableFunctions/TableFunctionFile.cpp b/src/TableFunctions/TableFunctionFile.cpp index 78dfaeefafe..b078d07b934 100644 --- a/src/TableFunctions/TableFunctionFile.cpp +++ b/src/TableFunctions/TableFunctionFile.cpp @@ -1,6 +1,7 @@ #include #include +#include "Parsers/IAST_fwd.h" #include "registerTableFunctions.h" #include #include @@ -18,24 +19,19 @@ namespace ErrorCodes { extern const int UNKNOWN_IDENTIFIER; extern const int LOGICAL_ERROR; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int BAD_ARGUMENTS; } -void TableFunctionFile::parseArguments(const ASTPtr & ast_function, ContextPtr context) +void TableFunctionFile::parseFirstArguments(const ASTPtr & arg, ContextPtr context) { - /// Parse args - ASTs & args_func = ast_function->children; + if (context->getApplicationType() != Context::ApplicationType::LOCAL) + { + ITableFunctionFileLike::parseFirstArguments(arg, context); + return; + } - if (args_func.size() != 1) - throw Exception("Table function '" + getName() + "' must have arguments.", ErrorCodes::LOGICAL_ERROR); - - ASTs & args = args_func.at(0)->children; - - if (args.empty()) - throw Exception("Table function '" + getName() + "' requires at least 1 argument", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - if (auto opt_name = tryGetIdentifierName(args[0])) { + if (auto opt_name = tryGetIdentifierName(arg)) + { if (*opt_name == "stdin") fd = STDIN_FILENO; else if (*opt_name == "stdout") @@ -43,54 +39,33 @@ void TableFunctionFile::parseArguments(const ASTPtr & ast_function, ContextPtr c else if (*opt_name == "stderr") fd = STDERR_FILENO; else - throw Exception("Unknow identifier '" + *opt_name + "' in first second arguments", ErrorCodes::UNKNOWN_IDENTIFIER); + throw Exception("Unknown identifier '" + *opt_name + "' in first arguments", ErrorCodes::UNKNOWN_IDENTIFIER); } - else if (const auto * literal = args[0]->as()) { + else if (const auto * literal = arg->as()) + { auto type = literal->value.getType(); if (type == Field::Types::Int64) fd = static_cast(literal->value.get()); else if (type == Field::Types::UInt64) fd = static_cast(literal->value.get()); - else if (type == Field::Types::String) { + else if (type == Field::Types::String) + { filename = literal->value.get(); if (filename == "-") fd = 0; } else - throw Exception("The second argument of table function '" + getName() + "' mush be path or file descriptor", ErrorCodes::BAD_ARGUMENTS); + throw Exception( + "The second argument of table function '" + getName() + "' mush be path or file descriptor", ErrorCodes::BAD_ARGUMENTS); } +} - if (args.size() > 1) { - args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(args[1], context); - format = args[1]->as().value.safeGet(); - } - - if (format == "auto") { - if (fd >= 0) - format = FormatFactory::instance().getFormatFromFileDescriptor(fd); - else - format = FormatFactory::instance().getFormatFromFileName(filename, true); - } - - if (args.size() <= 2) - return; - - if (args.size() != 3 && args.size() != 4) - throw Exception("Table function '" + getName() + "' requires 1, 2, 3 or 4 arguments: filename (or file descriptor), format (default auto), structure (default auto) and compression method (default auto)", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - args[2] = evaluateConstantExpressionOrIdentifierAsLiteral(args[2], context); - structure = args[2]->as().value.safeGet(); - - if (structure.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Table structure is empty for table function '{}'. If you want to use automatic schema inference, use 'auto'", - ast_function->formatForErrorMessage()); - - if (args.size() == 4) { - args[3] = evaluateConstantExpressionOrIdentifierAsLiteral(args[3], context); - compression_method = args[3]->as().value.safeGet(); - } +String TableFunctionFile::getFormatFromFirstArgument() +{ + if (fd >= 0) + return FormatFactory::instance().getFormatFromFileDescriptor(fd); + else + return FormatFactory::instance().getFormatFromFileName(filename, true); } StoragePtr TableFunctionFile::getStorage(const String & source, @@ -120,6 +95,9 @@ ColumnsDescription TableFunctionFile::getActualTableStructure(ContextPtr context { if (structure == "auto") { + if (fd >= 0) + throw Exception( + "Schema inference is not supported for table function '" + getName() + "' with file descriptor", ErrorCodes::LOGICAL_ERROR); size_t total_bytes_to_read = 0; Strings paths = StorageFile::getPathsList(filename, context->getUserFilesPath(), context, total_bytes_to_read); return StorageFile::getTableStructureFromFile(format, paths, compression_method, std::nullopt, context); diff --git a/src/TableFunctions/TableFunctionFile.h b/src/TableFunctions/TableFunctionFile.h index 809cbeef9c3..f956043e69a 100644 --- a/src/TableFunctions/TableFunctionFile.h +++ b/src/TableFunctions/TableFunctionFile.h @@ -21,10 +21,11 @@ public: } ColumnsDescription getActualTableStructure(ContextPtr context) const override; - void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; protected: int fd = -1; + void parseFirstArguments(const ASTPtr & arg, ContextPtr context) override; + String getFormatFromFirstArgument() override; private: StoragePtr getStorage( From 6fa0c90955a354b8285fb85cd4e06d3d76000abf Mon Sep 17 00:00:00 2001 From: wuxiaobai24 Date: Thu, 28 Apr 2022 20:47:49 +0800 Subject: [PATCH 3/4] Update src/TableFunctions/TableFunctionFile.cpp Co-authored-by: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> --- src/TableFunctions/TableFunctionFile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/TableFunctions/TableFunctionFile.cpp b/src/TableFunctions/TableFunctionFile.cpp index b078d07b934..d359f95d219 100644 --- a/src/TableFunctions/TableFunctionFile.cpp +++ b/src/TableFunctions/TableFunctionFile.cpp @@ -56,7 +56,7 @@ void TableFunctionFile::parseFirstArguments(const ASTPtr & arg, ContextPtr conte } else throw Exception( - "The second argument of table function '" + getName() + "' mush be path or file descriptor", ErrorCodes::BAD_ARGUMENTS); + "The first argument of table function '" + getName() + "' mush be path or file descriptor", ErrorCodes::BAD_ARGUMENTS); } } From b28d7d513067abe29cb4235a058e10c7160c1a4f Mon Sep 17 00:00:00 2001 From: wuxiaobai24 Date: Thu, 28 Apr 2022 20:55:57 +0800 Subject: [PATCH 4/4] fix --- src/TableFunctions/TableFunctionFile.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/TableFunctions/TableFunctionFile.cpp b/src/TableFunctions/TableFunctionFile.cpp index d359f95d219..146e59a8265 100644 --- a/src/TableFunctions/TableFunctionFile.cpp +++ b/src/TableFunctions/TableFunctionFile.cpp @@ -17,7 +17,6 @@ namespace DB namespace ErrorCodes { - extern const int UNKNOWN_IDENTIFIER; extern const int LOGICAL_ERROR; extern const int BAD_ARGUMENTS; } @@ -39,20 +38,22 @@ void TableFunctionFile::parseFirstArguments(const ASTPtr & arg, ContextPtr conte else if (*opt_name == "stderr") fd = STDERR_FILENO; else - throw Exception("Unknown identifier '" + *opt_name + "' in first arguments", ErrorCodes::UNKNOWN_IDENTIFIER); + filename = *opt_name; } else if (const auto * literal = arg->as()) { auto type = literal->value.getType(); - if (type == Field::Types::Int64) - fd = static_cast(literal->value.get()); - else if (type == Field::Types::UInt64) - fd = static_cast(literal->value.get()); + if (type == Field::Types::Int64 || type == Field::Types::UInt64) + { + fd = (type == Field::Types::Int64) ? static_cast(literal->value.get()) : static_cast(literal->value.get()); + if (fd < 0) + throw Exception("File descriptor must be non-negative", ErrorCodes::BAD_ARGUMENTS); + } else if (type == Field::Types::String) { filename = literal->value.get(); if (filename == "-") - fd = 0; + fd = STDIN_FILENO; } else throw Exception(