Merge pull request #36562 from wuxiaobai24/file_descriptor

allow file descriptor in table function file()
This commit is contained in:
Kruglov Pavel 2022-05-02 13:25:13 +02:00 committed by GitHub
commit fd980e6840
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 109 additions and 4 deletions

View File

@ -25,6 +25,17 @@ namespace ErrorCodes
extern const int BAD_ARGUMENTS;
}
void ITableFunctionFileLike::parseFirstArguments(const ASTPtr & arg, ContextPtr context)
{
auto ast = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context);
filename = ast->as<ASTLiteral &>().value.safeGet<String>();
}
String ITableFunctionFileLike::getFormatFromFirstArgument()
{
return FormatFactory::instance().getFormatFromFileName(filename, true);
}
void ITableFunctionFileLike::parseArguments(const ASTPtr & ast_function, ContextPtr context)
{
/// Parse args
@ -38,16 +49,16 @@ void ITableFunctionFileLike::parseArguments(const ASTPtr & ast_function, Context
if (args.empty())
throw Exception("Table function '" + getName() + "' requires at least 1 argument", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
for (auto & arg : args)
arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context);
parseFirstArguments(args[0], context);
filename = args[0]->as<ASTLiteral &>().value.safeGet<String>();
for (size_t i = 1; i < args.size(); ++i)
args[i] = evaluateConstantExpressionOrIdentifierAsLiteral(args[i], context);
if (args.size() > 1)
format = args[1]->as<ASTLiteral &>().value.safeGet<String>();
if (format == "auto")
format = FormatFactory::instance().getFormatFromFileName(filename, true);
format = getFormatFromFirstArgument();
if (args.size() <= 2)
return;

View File

@ -1,6 +1,7 @@
#pragma once
#include <TableFunctions/ITableFunction.h>
#include "Parsers/IAST_fwd.h"
namespace DB
{
@ -19,6 +20,8 @@ public:
protected:
void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
virtual void parseFirstArguments(const ASTPtr & arg, ContextPtr context);
virtual String getFormatFromFirstArgument();
String filename;
String format = "auto";

View File

@ -1,16 +1,74 @@
#include <TableFunctions/TableFunctionFile.h>
#include <TableFunctions/parseColumnsListForTableFunction.h>
#include "Parsers/IAST_fwd.h"
#include "registerTableFunctions.h"
#include <Access/Common/AccessFlags.h>
#include <Interpreters/Context.h>
#include <Storages/ColumnsDescription.h>
#include <Storages/StorageFile.h>
#include <TableFunctions/TableFunctionFactory.h>
#include <Interpreters/evaluateConstantExpression.h>
#include <Formats/FormatFactory.h>
#include <Parsers/ASTIdentifier_fwd.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int BAD_ARGUMENTS;
}
void TableFunctionFile::parseFirstArguments(const ASTPtr & arg, ContextPtr context)
{
if (context->getApplicationType() != Context::ApplicationType::LOCAL)
{
ITableFunctionFileLike::parseFirstArguments(arg, context);
return;
}
if (auto opt_name = tryGetIdentifierName(arg))
{
if (*opt_name == "stdin")
fd = STDIN_FILENO;
else if (*opt_name == "stdout")
fd = STDOUT_FILENO;
else if (*opt_name == "stderr")
fd = STDERR_FILENO;
else
filename = *opt_name;
}
else if (const auto * literal = arg->as<ASTLiteral>())
{
auto type = literal->value.getType();
if (type == Field::Types::Int64 || type == Field::Types::UInt64)
{
fd = (type == Field::Types::Int64) ? static_cast<int>(literal->value.get<Int64>()) : static_cast<int>(literal->value.get<UInt64>());
if (fd < 0)
throw Exception("File descriptor must be non-negative", ErrorCodes::BAD_ARGUMENTS);
}
else if (type == Field::Types::String)
{
filename = literal->value.get<String>();
if (filename == "-")
fd = STDIN_FILENO;
}
else
throw Exception(
"The first argument of table function '" + getName() + "' mush be path or file descriptor", ErrorCodes::BAD_ARGUMENTS);
}
}
String TableFunctionFile::getFormatFromFirstArgument()
{
if (fd >= 0)
return FormatFactory::instance().getFormatFromFileDescriptor(fd);
else
return FormatFactory::instance().getFormatFromFileName(filename, true);
}
StoragePtr TableFunctionFile::getStorage(const String & source,
const String & format_, const ColumnsDescription & columns,
ContextPtr global_context, const std::string & table_name,
@ -28,6 +86,8 @@ StoragePtr TableFunctionFile::getStorage(const String & source,
ConstraintsDescription{},
String{},
};
if (fd >= 0)
return StorageFile::create(fd, args);
return StorageFile::create(source, global_context->getUserFilesPath(), args);
}
@ -36,6 +96,9 @@ ColumnsDescription TableFunctionFile::getActualTableStructure(ContextPtr context
{
if (structure == "auto")
{
if (fd >= 0)
throw Exception(
"Schema inference is not supported for table function '" + getName() + "' with file descriptor", ErrorCodes::LOGICAL_ERROR);
size_t total_bytes_to_read = 0;
Strings paths = StorageFile::getPathsList(filename, context->getUserFilesPath(), context, total_bytes_to_read);
return StorageFile::getTableStructureFromFile(format, paths, compression_method, std::nullopt, context);

View File

@ -22,6 +22,11 @@ public:
ColumnsDescription getActualTableStructure(ContextPtr context) const override;
protected:
int fd = -1;
void parseFirstArguments(const ASTPtr & arg, ContextPtr context) override;
String getFormatFromFirstArgument() override;
private:
StoragePtr getStorage(
const String & source, const String & format_, const ColumnsDescription & columns, ContextPtr global_context,

View File

@ -0,0 +1,8 @@
1 2
3 4
1 2
3 4
1 2
3 4
1 2
3 4

View File

@ -0,0 +1,15 @@
#!/usr/bin/env bash
# Tags: no-parallel
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
echo -e "1,2\n3,4" > 02286_data.csv
$CLICKHOUSE_LOCAL --query "SELECT * FROM file(0, CSV)" < 02286_data.csv
$CLICKHOUSE_LOCAL --query "SELECT * FROM file(stdin, CSV)" < 02286_data.csv
$CLICKHOUSE_LOCAL --query "SELECT * FROM file('-', CSV)" < 02286_data.csv
$CLICKHOUSE_LOCAL --query "SELECT * FROM file(5, CSV)" 5< 02286_data.csv
rm 02286_data.csv