allow file descriptor in table function file()

This commit is contained in:
wuxiaobai24 2022-04-23 11:01:27 +08:00
parent 5b74281a02
commit 751185a366
4 changed files with 111 additions and 0 deletions

View File

@ -7,10 +7,92 @@
#include <Storages/ColumnsDescription.h>
#include <Storages/StorageFile.h>
#include <TableFunctions/TableFunctionFactory.h>
#include <Interpreters/evaluateConstantExpression.h>
#include <Formats/FormatFactory.h>
#include <Parsers/ASTIdentifier_fwd.h>
namespace DB
{
namespace ErrorCodes
{
extern const int UNKNOWN_IDENTIFIER;
extern const int LOGICAL_ERROR;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int BAD_ARGUMENTS;
}
void TableFunctionFile::parseArguments(const ASTPtr & ast_function, ContextPtr context)
{
/// Parse args
ASTs & args_func = ast_function->children;
if (args_func.size() != 1)
throw Exception("Table function '" + getName() + "' must have arguments.", ErrorCodes::LOGICAL_ERROR);
ASTs & args = args_func.at(0)->children;
if (args.empty())
throw Exception("Table function '" + getName() + "' requires at least 1 argument", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
if (auto opt_name = tryGetIdentifierName(args[0])) {
if (*opt_name == "stdin")
fd = STDIN_FILENO;
else if (*opt_name == "stdout")
fd = STDOUT_FILENO;
else if (*opt_name == "stderr")
fd = STDERR_FILENO;
else
throw Exception("Unknow identifier '" + *opt_name + "' in first second arguments", ErrorCodes::UNKNOWN_IDENTIFIER);
}
else if (const auto * literal = args[0]->as<ASTLiteral>()) {
auto type = literal->value.getType();
if (type == Field::Types::Int64)
fd = static_cast<int>(literal->value.get<Int64>());
else if (type == Field::Types::UInt64)
fd = static_cast<int>(literal->value.get<UInt64>());
else if (type == Field::Types::String) {
filename = literal->value.get<String>();
if (filename == "-")
fd = 0;
}
else
throw Exception("The second argument of table function '" + getName() + "' mush be path or file descriptor", ErrorCodes::BAD_ARGUMENTS);
}
if (args.size() > 1) {
args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(args[1], context);
format = args[1]->as<ASTLiteral &>().value.safeGet<String>();
}
if (format == "auto") {
if (fd >= 0)
format = FormatFactory::instance().getFormatFromFileDescriptor(fd);
else
format = FormatFactory::instance().getFormatFromFileName(filename, true);
}
if (args.size() <= 2)
return;
if (args.size() != 3 && args.size() != 4)
throw Exception("Table function '" + getName() + "' requires 1, 2, 3 or 4 arguments: filename (or file descriptor), format (default auto), structure (default auto) and compression method (default auto)",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
args[2] = evaluateConstantExpressionOrIdentifierAsLiteral(args[2], context);
structure = args[2]->as<ASTLiteral &>().value.safeGet<String>();
if (structure.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Table structure is empty for table function '{}'. If you want to use automatic schema inference, use 'auto'",
ast_function->formatForErrorMessage());
if (args.size() == 4) {
args[3] = evaluateConstantExpressionOrIdentifierAsLiteral(args[3], context);
compression_method = args[3]->as<ASTLiteral &>().value.safeGet<String>();
}
}
StoragePtr TableFunctionFile::getStorage(const String & source,
const String & format_, const ColumnsDescription & columns,
ContextPtr global_context, const std::string & table_name,
@ -28,6 +110,8 @@ StoragePtr TableFunctionFile::getStorage(const String & source,
ConstraintsDescription{},
String{},
};
if (fd >= 0)
return StorageFile::create(fd, args);
return StorageFile::create(source, global_context->getUserFilesPath(), args);
}

View File

@ -21,6 +21,10 @@ public:
}
ColumnsDescription getActualTableStructure(ContextPtr context) const override;
void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
protected:
int fd = -1;
private:
StoragePtr getStorage(

View File

@ -0,0 +1,8 @@
1 2
3 4
1 2
3 4
1 2
3 4
1 2
3 4

View File

@ -0,0 +1,15 @@
#!/usr/bin/env bash
# Tags: no-parallel
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
echo -e "1,2\n3,4" > 02286_data.csv
$CLICKHOUSE_LOCAL --query "SELECT * FROM file(0, CSV)" < 02286_data.csv
$CLICKHOUSE_LOCAL --query "SELECT * FROM file(stdin, CSV)" < 02286_data.csv
$CLICKHOUSE_LOCAL --query "SELECT * FROM file('-', CSV)" < 02286_data.csv
$CLICKHOUSE_LOCAL --query "SELECT * FROM file(5, CSV)" 5< 02286_data.csv
rm 02286_data.csv