ClickHouse/src/TableFunctions/TableFunctionFile.cpp
Azat Khuzhin 0b37344b26 Use BAD_ARGUMENTS over LOGICAL_ERROR for schema inference error file() over fd
Otherwise the following leads to SIGSEGV in debug/sanitizers builds:

  echo '0000000000Custom NULL representation0000000000' | clickhouse-local -q "desc file('-', 'TSV')"

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2023-02-08 05:36:49 +01:00

106 lines
3.5 KiB
C++

#include <TableFunctions/TableFunctionFile.h>
#include <Interpreters/parseColumnsListForTableFunction.h>
#include "Parsers/IAST_fwd.h"
#include "registerTableFunctions.h"
#include <Access/Common/AccessFlags.h>
#include <Interpreters/Context.h>
#include <Storages/ColumnsDescription.h>
#include <Storages/StorageFile.h>
#include <TableFunctions/TableFunctionFactory.h>
#include <Interpreters/evaluateConstantExpression.h>
#include <Formats/FormatFactory.h>
#include <Parsers/ASTIdentifier_fwd.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
void TableFunctionFile::parseFirstArguments(const ASTPtr & arg, const ContextPtr & context)
{
if (context->getApplicationType() != Context::ApplicationType::LOCAL)
{
ITableFunctionFileLike::parseFirstArguments(arg, context);
return;
}
const auto * literal = arg->as<ASTLiteral>();
auto type = literal->value.getType();
if (type == Field::Types::String)
{
filename = literal->value.safeGet<String>();
if (filename == "stdin" || filename == "-")
fd = STDIN_FILENO;
else if (filename == "stdout")
fd = STDOUT_FILENO;
else if (filename == "stderr")
fd = STDERR_FILENO;
}
else if (type == Field::Types::Int64 || type == Field::Types::UInt64)
{
fd = static_cast<int>(
(type == Field::Types::Int64) ? literal->value.get<Int64>() : literal->value.get<UInt64>());
if (fd < 0)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "File descriptor must be non-negative");
}
else
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The first argument of table function '{}' mush be path or file descriptor", getName());
}
String TableFunctionFile::getFormatFromFirstArgument()
{
if (fd >= 0)
return FormatFactory::instance().getFormatFromFileDescriptor(fd);
else
return FormatFactory::instance().getFormatFromFileName(filename, true);
}
StoragePtr TableFunctionFile::getStorage(const String & source,
const String & format_, const ColumnsDescription & columns,
ContextPtr global_context, const std::string & table_name,
const std::string & compression_method_) const
{
// For `file` table function, we are going to use format settings from the
// query context.
StorageFile::CommonArguments args{
WithContext(global_context),
StorageID(getDatabaseName(), table_name),
format_,
std::nullopt /*format settings*/,
compression_method_,
columns,
ConstraintsDescription{},
String{},
};
if (fd >= 0)
return std::make_shared<StorageFile>(fd, args);
return std::make_shared<StorageFile>(source, global_context->getUserFilesPath(), args);
}
ColumnsDescription TableFunctionFile::getActualTableStructure(ContextPtr context) const
{
if (structure == "auto")
{
if (fd >= 0)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Schema inference is not supported for table function '{}' with file descriptor", getName());
size_t total_bytes_to_read = 0;
Strings paths = StorageFile::getPathsList(filename, context->getUserFilesPath(), context, total_bytes_to_read);
return StorageFile::getTableStructureFromFile(format, paths, compression_method, std::nullopt, context);
}
return parseColumnsListFromString(structure, context);
}
void registerTableFunctionFile(TableFunctionFactory & factory)
{
factory.registerFunction<TableFunctionFile>();
}
}