Fix schema inference with named collection, refactor Cluster table functions

This commit is contained in:
avogar 2023-05-12 13:58:45 +00:00
parent 3ee8de792c
commit 70a8fd2c50
30 changed files with 572 additions and 382 deletions

View File

@ -15,12 +15,14 @@
#include <Processors/Sources/RemoteSource.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/queryToString.h>
#include <Storages/HDFS/HDFSCommon.h>
#include <Storages/IStorage.h>
#include <Storages/SelectQueryInfo.h>
#include <Storages/HDFS/HDFSCommon.h>
#include <Storages/addColumnsStructureToQueryWithClusterEngine.h>
#include <Storages/extractTableFunctionArgumentsFromSelectQuery.h>
#include <TableFunctions/TableFunctionHDFSCluster.h>
#include <memory>
@ -59,13 +61,17 @@ StorageHDFSCluster::StorageHDFSCluster(
setInMemoryMetadata(storage_metadata);
}
void StorageHDFSCluster::addColumnsStructureToQuery(ASTPtr & query, const String & structure)
void StorageHDFSCluster::addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context)
{
addColumnsStructureToQueryWithHDFSClusterEngine(query, structure);
ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query);
if (!expression_list)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected SELECT query from table function hdfsCluster, got '{}'", queryToString(query));
TableFunctionHDFSCluster::addColumnsStructureToArguments(expression_list->children, structure, context);
}
RemoteQueryExecutor::Extension StorageHDFSCluster::getTaskIteratorExtension(ASTPtr, ContextPtr context) const
RemoteQueryExecutor::Extension StorageHDFSCluster::getTaskIteratorExtension(ASTPtr, const ContextPtr & context) const
{
auto iterator = std::make_shared<HDFSSource::DisclosedGlobIterator>(context, uri);
auto callback = std::make_shared<HDFSSource::IteratorWrapper>([iter = std::move(iterator)]() mutable -> String { return iter->next(); });

View File

@ -35,10 +35,10 @@ public:
NamesAndTypesList getVirtuals() const override;
RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, ContextPtr context) const override;
RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const override;
private:
void addColumnsStructureToQuery(ASTPtr & query, const String & structure) override;
void addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) override;
String uri;
String format_name;

View File

@ -77,7 +77,7 @@ Pipe IStorageCluster::read(
const bool add_agg_info = processed_stage == QueryProcessingStage::WithMergeableState;
if (!structure_argument_was_provided)
addColumnsStructureToQuery(query_to_send, StorageDictionary::generateNamesAndTypesDescription(storage_snapshot->metadata->getColumns().getAll()));
addColumnsStructureToQuery(query_to_send, StorageDictionary::generateNamesAndTypesDescription(storage_snapshot->metadata->getColumns().getAll()), context);
RestoreQualifiedNamesVisitor::Data data;
data.distributed_table = DatabaseAndTableWithAlias(*getTableExpression(query_info.query->as<ASTSelectQuery &>(), 0));
@ -89,7 +89,7 @@ Pipe IStorageCluster::read(
/* only_replace_in_join_= */true);
visitor.visit(query_to_send);
auto new_context = updateSettingsForTableFunctionCluster(context, context->getSettingsRef());
auto new_context = updateSettings(context, context->getSettingsRef());
const auto & current_settings = new_context->getSettingsRef();
auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(current_settings);
for (const auto & shard_info : cluster->getShardsInfo())
@ -129,7 +129,7 @@ QueryProcessingStage::Enum IStorageCluster::getQueryProcessingStage(
return QueryProcessingStage::Enum::FetchColumns;
}
ContextPtr IStorageCluster::updateSettingsForTableFunctionCluster(ContextPtr context, const Settings & settings)
ContextPtr IStorageCluster::updateSettings(ContextPtr context, const Settings & settings)
{
Settings new_settings = settings;

View File

@ -3,6 +3,7 @@
#include <Storages/IStorage.h>
#include <Interpreters/Cluster.h>
#include <QueryPipeline/RemoteQueryExecutor.h>
#include <Parsers/ASTExpressionList.h>
namespace DB
{
@ -21,12 +22,18 @@ public:
Poco::Logger * log_,
bool structure_argument_was_provided_);
Pipe read(const Names &, const StorageSnapshotPtr &, SelectQueryInfo &,
ContextPtr, QueryProcessingStage::Enum, size_t /*max_block_size*/, size_t /*num_streams*/) override;
Pipe read(
const Names & column_names,
const StorageSnapshotPtr & storage_snapshot,
SelectQueryInfo & query_info,
ContextPtr context,
QueryProcessingStage::Enum processed_stage,
size_t /*max_block_size*/,
size_t /*num_streams*/) override;
ClusterPtr getCluster(ContextPtr context) const;
/// Query is needed for pruning by virtual columns (_file, _path)
virtual RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, ContextPtr context) const = 0;
virtual RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const = 0;
QueryProcessingStage::Enum getQueryProcessingStage(ContextPtr, QueryProcessingStage::Enum, const StorageSnapshotPtr &, SelectQueryInfo &) const override;
@ -35,14 +42,14 @@ public:
protected:
virtual void updateBeforeRead(const ContextPtr &) {}
virtual void addColumnsStructureToQuery(ASTPtr & query, const String & structure) = 0;
virtual void addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) = 0;
private:
ContextPtr updateSettings(ContextPtr context, const Settings & settings);
Poco::Logger * log;
String cluster_name;
bool structure_argument_was_provided;
ContextPtr updateSettingsForTableFunctionCluster(ContextPtr context, const Settings & settings);
};

View File

@ -4,19 +4,22 @@
#if USE_AWS_S3
#include "Common/Exception.h"
#include <DataTypes/DataTypeString.h>
#include <IO/ConnectionTimeouts.h>
#include <Interpreters/InterpreterSelectQuery.h>
#include <Interpreters/AddDefaultDatabaseVisitor.h>
#include <Processors/Transforms/AddingDefaultsTransform.h>
#include <Interpreters/InterpreterSelectQuery.h>
#include <Processors/Sources/RemoteSource.h>
#include <Processors/Transforms/AddingDefaultsTransform.h>
#include <QueryPipeline/RemoteQueryExecutor.h>
#include <Storages/IStorage.h>
#include <Storages/StorageURL.h>
#include <Storages/SelectQueryInfo.h>
#include <Storages/getVirtualsForStorage.h>
#include <Storages/StorageDictionary.h>
#include <Storages/addColumnsStructureToQueryWithClusterEngine.h>
#include <Storages/extractTableFunctionArgumentsFromSelectQuery.h>
#include <Storages/getVirtualsForStorage.h>
#include <Common/Exception.h>
#include <Parsers/queryToString.h>
#include <TableFunctions/TableFunctionS3Cluster.h>
#include <memory>
#include <string>
@ -61,9 +64,13 @@ StorageS3Cluster::StorageS3Cluster(
virtual_block.insert({column.type->createColumn(), column.type, column.name});
}
void StorageS3Cluster::addColumnsStructureToQuery(ASTPtr & query, const String & structure)
void StorageS3Cluster::addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context)
{
addColumnsStructureToQueryWithS3ClusterEngine(query, structure);
ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query);
if (!expression_list)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected SELECT query from table function s3Cluster, got '{}'", queryToString(query));
TableFunctionS3Cluster::addColumnsStructureToArguments(expression_list->children, structure, context);
}
void StorageS3Cluster::updateConfigurationIfChanged(ContextPtr local_context)
@ -71,7 +78,7 @@ void StorageS3Cluster::updateConfigurationIfChanged(ContextPtr local_context)
s3_configuration.update(local_context);
}
RemoteQueryExecutor::Extension StorageS3Cluster::getTaskIteratorExtension(ASTPtr query, ContextPtr context) const
RemoteQueryExecutor::Extension StorageS3Cluster::getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const
{
auto iterator = std::make_shared<StorageS3Source::DisclosedGlobIterator>(
*s3_configuration.client, s3_configuration.url, query, virtual_block, context);

View File

@ -34,7 +34,7 @@ public:
NamesAndTypesList getVirtuals() const override;
RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, ContextPtr context) const override;
RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const override;
protected:
void updateConfigurationIfChanged(ContextPtr local_context);
@ -42,7 +42,7 @@ protected:
private:
void updateBeforeRead(const ContextPtr & context) override { updateConfigurationIfChanged(context); }
void addColumnsStructureToQuery(ASTPtr & query, const String & structure) override;
void addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) override;
StorageS3::Configuration s3_configuration;
NamesAndTypesList virtual_columns;

View File

@ -13,13 +13,14 @@
#include <Processors/Transforms/AddingDefaultsTransform.h>
#include <Processors/Sources/RemoteSource.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/queryToString.h>
#include <Storages/IStorage.h>
#include <Storages/StorageURL.h>
#include <Storages/SelectQueryInfo.h>
#include <Storages/StorageDictionary.h>
#include <Storages/addColumnsStructureToQueryWithClusterEngine.h>
#include <Storages/StorageURL.h>
#include <Storages/extractTableFunctionArgumentsFromSelectQuery.h>
#include <TableFunctions/TableFunctionURLCluster.h>
#include <memory>
@ -62,12 +63,16 @@ StorageURLCluster::StorageURLCluster(
setInMemoryMetadata(storage_metadata);
}
void StorageURLCluster::addColumnsStructureToQuery(ASTPtr & query, const String & structure)
void StorageURLCluster::addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context)
{
addColumnsStructureToQueryWithURLClusterEngine(query, structure);
ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query);
if (!expression_list)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected SELECT query from table function urlCluster, got '{}'", queryToString(query));
TableFunctionURLCluster::addColumnsStructureToArguments(expression_list->children, structure, context);
}
RemoteQueryExecutor::Extension StorageURLCluster::getTaskIteratorExtension(ASTPtr, ContextPtr context) const
RemoteQueryExecutor::Extension StorageURLCluster::getTaskIteratorExtension(ASTPtr, const ContextPtr & context) const
{
auto iterator = std::make_shared<StorageURLSource::DisclosedGlobIterator>(uri, context->getSettingsRef().glob_expansion_max_elements);
auto callback = std::make_shared<TaskIterator>([iter = std::move(iterator)]() mutable -> String { return iter->next(); });

View File

@ -34,10 +34,10 @@ public:
NamesAndTypesList getVirtuals() const override;
RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, ContextPtr context) const override;
RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const override;
private:
void addColumnsStructureToQuery(ASTPtr & query, const String & structure) override;
void addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) override;
String uri;
String format_name;

View File

@ -0,0 +1,29 @@
#include <Storages/extractTableFunctionArgumentsFromSelectQuery.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/queryToString.h>
namespace DB
{
ASTExpressionList * extractTableFunctionArgumentsFromSelectQuery(ASTPtr & query)
{
auto * select_query = query->as<ASTSelectQuery>();
if (!select_query || !select_query->tables())
return nullptr;
auto * tables = select_query->tables()->as<ASTTablesInSelectQuery>();
auto * table_expression = tables->children[0]->as<ASTTablesInSelectQueryElement>()->table_expression->as<ASTTableExpression>();
if (!table_expression->table_function)
return nullptr;
auto * table_function = table_expression->table_function->as<ASTFunction>();
return table_function->arguments->as<ASTExpressionList>();
}
}

View File

@ -0,0 +1,11 @@
#pragma once
#include <Parsers/IAST.h>
#include <Parsers/ASTExpressionList.h>
namespace DB
{
ASTExpressionList * extractTableFunctionArgumentsFromSelectQuery(ASTPtr & query);
}

View File

@ -0,0 +1,3 @@
//
// Created by Павел Круглов on 12/05/2023.
//

View File

@ -0,0 +1,73 @@
#pragma once
#include "config.h"
#include <TableFunctions/ITableFunction.h>
#include <TableFunctions/TableFunctionS3.h>
#include <Storages/StorageS3Cluster.h>
#include <Interpreters/evaluateConstantExpression.h>
#include <Storages/checkAndGetLiteralArgument.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int BAD_GET;
extern const int LOGICAL_ERROR;
}
class Context;
/// Base class for *Cluster table functions that require cluster_name for the first argument.
template <typename Base>
class ITableFunctionCluster : public Base
{
public:
String getName() const override = 0;
String getSignature() const override = 0;
static void addColumnsStructureToArguments(ASTs & args, const String & desired_structure, const ContextPtr & context)
{
if (args.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected empty list of arguments for {}Cluster table function", Base::name);
ASTPtr cluster_name_arg = args.front();
args.erase(args.begin());
Base::addColumnsStructureToArguments(args, desired_structure, context);
args.insert(args.begin(), cluster_name_arg);
}
protected:
void parseArguments(const ASTPtr & ast, ContextPtr context) override
{
/// Clone ast function, because we can modify its arguments like removing cluster_name
Base::parseArguments(ast->clone(), context);
}
void parseArgumentsImpl(ASTs & args, const ContextPtr & context) override
{
if (args.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The signature of table function {} shall be the following:\n{}", getName(), getSignature());
/// Evaluate only first argument, everything else will be done Base class
args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(args[0], context);
/// Cluster name is always the first
cluster_name = checkAndGetLiteralArgument<String>(args[0], "cluster_name");
if (!context->tryGetCluster(cluster_name))
throw Exception(ErrorCodes::BAD_GET, "Requested cluster '{}' not found", cluster_name);
/// Just cut the first arg (cluster_name) and try to parse other table function arguments as is
args.erase(args.begin());
Base::parseArgumentsImpl(args, context);
}
String cluster_name;
};
}

View File

@ -19,7 +19,7 @@ namespace ErrorCodes
}
template <typename Name, typename Storage, typename TableFunction>
class ITableFunctionDataLake : public ITableFunction
class ITableFunctionDataLake : public TableFunction
{
public:
static constexpr auto name = Name::name;
@ -33,11 +33,11 @@ protected:
ColumnsDescription /*cached_columns*/) const override
{
ColumnsDescription columns;
if (configuration.structure != "auto")
columns = parseColumnsListFromString(configuration.structure, context);
if (TableFunction::configuration.structure != "auto")
columns = parseColumnsListFromString(TableFunction::configuration.structure, context);
StoragePtr storage = std::make_shared<Storage>(
configuration, context, StorageID(getDatabaseName(), table_name),
TableFunction::configuration, context, StorageID(TableFunction::getDatabaseName(), table_name),
columns, ConstraintsDescription{}, String{}, std::nullopt);
storage->startup();
@ -48,34 +48,21 @@ protected:
ColumnsDescription getActualTableStructure(ContextPtr context) const override
{
if (configuration.structure == "auto")
if (TableFunction::configuration.structure == "auto")
{
context->checkAccess(getSourceAccessType());
return Storage::getTableStructureFromData(configuration, std::nullopt, context);
context->checkAccess(TableFunction::getSourceAccessType());
return Storage::getTableStructureFromData(TableFunction::configuration, std::nullopt, context);
}
return parseColumnsListFromString(configuration.structure, context);
return parseColumnsListFromString(TableFunction::configuration.structure, context);
}
void parseArguments(const ASTPtr & ast_function, ContextPtr context) override
{
ASTs & args_func = ast_function->children;
const auto message = fmt::format(
"The signature of table function '{}' could be the following:\n{}", getName(), TableFunction::signature);
if (args_func.size() != 1)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' must have arguments", getName());
auto & args = args_func.at(0)->children;
TableFunction::parseArgumentsImpl(message, args, context, configuration, false);
if (configuration.format == "auto")
configuration.format = "Parquet";
/// Set default format to Parquet if it's not specified in arguments.
TableFunction::configuration.format = "Parquet";
TableFunction::parseArguments(ast_function, context);
}
mutable typename Storage::Configuration configuration;
};
}

View File

@ -47,9 +47,13 @@ void ITableFunctionFileLike::parseArguments(const ASTPtr & ast_function, Context
throw Exception(ErrorCodes::LOGICAL_ERROR, "Table function '{}' must have arguments.", getName());
ASTs & args = args_func.at(0)->children;
parseArgumentsImpl(args, context);
}
if (args.empty())
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' requires at least 1 argument", getName());
void ITableFunctionFileLike::parseArgumentsImpl(ASTs & args, const ContextPtr & context)
{
if (args.empty() || args.size() > 4)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The signature of table function {} shall be the following:\n{}", getName(), getSignature());
for (auto & arg : args)
arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context);
@ -62,26 +66,51 @@ void ITableFunctionFileLike::parseArguments(const ASTPtr & ast_function, Context
if (format == "auto")
format = getFormatFromFirstArgument();
if (args.size() <= 2)
return;
if (args.size() > 2)
{
structure = checkAndGetLiteralArgument<String>(args[2], "structure");
if (structure.empty())
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Table structure is empty for table function '{}'. If you want to use automatic schema inference, use 'auto'",
getName());
}
if (args.size() != 3 && args.size() != 4)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Table function '{}' requires 1, 2, 3 or 4 arguments: "
"filename, format (default auto), structure (default auto) and compression method (default auto)",
getName());
structure = checkAndGetLiteralArgument<String>(args[2], "structure");
if (structure.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Table structure is empty for table function '{}'. If you want to use automatic schema inference, use 'auto'",
ast_function->formatForErrorMessage());
if (args.size() == 4)
if (args.size() > 3)
compression_method = checkAndGetLiteralArgument<String>(args[3], "compression_method");
}
void ITableFunctionFileLike::addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr &)
{
if (args.empty() || args.size() > getMaxNumberOfArguments())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected 1 to {} arguments in table function, got {}", getMaxNumberOfArguments(), args.size());
auto structure_literal = std::make_shared<ASTLiteral>(structure);
/// f(filename)
if (args.size() == 1)
{
/// Add format=auto before structure argument.
args.push_back(std::make_shared<ASTLiteral>("auto"));
args.push_back(structure_literal);
}
/// f(filename, format)
else if (args.size() == 2)
{
args.push_back(structure_literal);
}
/// f(filename, format, 'auto')
else if (args.size() == 3)
{
args.back() = structure_literal;
}
/// f(filename, format, 'auto', compression)
else if (args.size() == 4)
{
args[args.size() - 2] = structure_literal;
}
}
StoragePtr ITableFunctionFileLike::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const
{
ColumnsDescription columns;

View File

@ -9,19 +9,35 @@ class ColumnsDescription;
class Context;
/*
* function(source, format, structure[, compression_method]) - creates a temporary storage from formatted source
* function(source, [format, structure, compression_method]) - creates a temporary storage from formatted source
*/
class ITableFunctionFileLike : public ITableFunction
{
public:
static constexpr auto signature = " - filename\n"
" - filename, format\n"
" - filename, format, structure\n"
" - filename, format, structure, compression_method\n";
virtual String getSignature() const
{
return signature;
}
bool needStructureHint() const override { return structure == "auto"; }
void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; }
bool supportsReadingSubsetOfColumns() override;
static size_t getMaxNumberOfArguments() { return 4; }
static void addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr &);
protected:
void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
virtual void parseArgumentsImpl(ASTs & args, const ContextPtr & context);
virtual void parseFirstArguments(const ASTPtr & arg, const ContextPtr & context);
virtual String getFormatFromFirstArgument();

View File

@ -12,18 +12,28 @@ namespace DB
class Context;
/* hdfs(URI, format[, structure, compression]) - creates a temporary storage from hdfs files
/* hdfs(URI, [format, structure, compression]) - creates a temporary storage from hdfs files
*
*/
class TableFunctionHDFS : public ITableFunctionFileLike
{
public:
static constexpr auto name = "hdfs";
std::string getName() const override
static constexpr auto signature = " - uri\n"
" - uri, format\n"
" - uri, format, structure\n"
" - uri, format, structure, compression_method\n";
String getName() const override
{
return name;
}
String getSignature() const override
{
return signature;
}
ColumnsDescription getActualTableStructure(ContextPtr context) const override;
std::unordered_set<String> getVirtualsToCheckBeforeUsingStructureHint() const override

View File

@ -2,86 +2,19 @@
#if USE_HDFS
#include <Storages/HDFS/StorageHDFSCluster.h>
#include <DataTypes/DataTypeString.h>
#include <Storages/HDFS/StorageHDFS.h>
#include <Storages/checkAndGetLiteralArgument.h>
#include <Interpreters/evaluateConstantExpression.h>
#include <Interpreters/Context.h>
#include <Interpreters/ClientInfo.h>
#include <TableFunctions/TableFunctionFactory.h>
#include <TableFunctions/TableFunctionHDFS.h>
#include <TableFunctions/TableFunctionHDFSCluster.h>
#include <Interpreters/parseColumnsListForTableFunction.h>
#include <Access/Common/AccessFlags.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/IAST_fwd.h>
#include <TableFunctions/TableFunctionFactory.h>
#include <Storages/HDFS/StorageHDFSCluster.h>
#include <Storages/HDFS/StorageHDFS.h>
#include "registerTableFunctions.h"
#include <memory>
#include <thread>
namespace DB
{
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int BAD_GET;
}
void TableFunctionHDFSCluster::parseArguments(const ASTPtr & ast_function, ContextPtr context)
{
auto ast_copy = ast_function->clone();
/// Parse args
ASTs & args_func = ast_copy->children;
if (args_func.size() != 1)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' must have arguments.", getName());
ASTs & args = args_func.at(0)->children;
if (args.size() < 2 || args.size() > 5)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"The signature of table function {} shall be the following:\n"
" - cluster, uri\n"
" - cluster, uri, format\n"
" - cluster, uri, format, structure\n"
" - cluster, uri, format, structure, compression_method",
getName());
for (auto & arg : args)
arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context);
/// This argument is always the first
cluster_name = checkAndGetLiteralArgument<String>(args[0], "cluster_name");
if (!context->tryGetCluster(cluster_name))
throw Exception(ErrorCodes::BAD_GET, "Requested cluster '{}' not found", cluster_name);
/// Just cut the first arg (cluster_name) and try to parse other table function arguments as is
args.erase(args.begin());
ITableFunctionFileLike::parseArguments(ast_copy, context);
}
ColumnsDescription TableFunctionHDFSCluster::getActualTableStructure(ContextPtr context) const
{
if (structure == "auto")
{
context->checkAccess(getSourceAccessType());
return StorageHDFS::getTableStructureFromData(format, filename, compression_method, context);
}
return parseColumnsListFromString(structure, context);
}
StoragePtr TableFunctionHDFSCluster::getStorage(
const String & /*source*/, const String & /*format_*/, const ColumnsDescription & columns, ContextPtr context,
const std::string & table_name, const String & /*compression_method_*/) const

View File

@ -5,6 +5,8 @@
#if USE_HDFS
#include <TableFunctions/ITableFunctionFileLike.h>
#include <TableFunctions/TableFunctionHDFS.h>
#include <TableFunctions/ITableFunctionCluster.h>
namespace DB
@ -20,28 +22,31 @@ class Context;
* On worker node it asks initiator about next task to process, processes it.
* This is repeated until the tasks are finished.
*/
class TableFunctionHDFSCluster : public ITableFunctionFileLike
class TableFunctionHDFSCluster : public ITableFunctionCluster<TableFunctionHDFS>
{
public:
static constexpr auto name = "hdfsCluster";
std::string getName() const override
static constexpr auto signature = " - cluster_name, uri\n"
" - cluster_name, uri, format\n"
" - cluster_name, uri, format, structure\n"
" - cluster_name, uri, format, structure, compression_method\n";
String getName() const override
{
return name;
}
String getSignature() const override
{
return signature;
}
protected:
StoragePtr getStorage(
const String & source, const String & format_, const ColumnsDescription & columns, ContextPtr global_context,
const std::string & table_name, const String & compression_method_) const override;
const char * getStorageTypeName() const override { return "HDFSCluster"; }
AccessType getSourceAccessType() const override { return AccessType::HDFS; }
ColumnsDescription getActualTableStructure(ContextPtr) const override;
void parseArguments(const ASTPtr &, ContextPtr) override;
String cluster_name;
};
}

View File

@ -10,13 +10,14 @@
#include <Interpreters/parseColumnsListForTableFunction.h>
#include <Access/Common/AccessFlags.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTFunction.h>
#include <Storages/checkAndGetLiteralArgument.h>
#include <Storages/StorageS3.h>
#include <Storages/StorageURL.h>
#include <Storages/NamedCollectionsHelpers.h>
#include <Formats/FormatFactory.h>
#include "registerTableFunctions.h"
#include <filesystem>
#include <boost/algorithm/string.hpp>
@ -31,19 +32,18 @@ namespace ErrorCodes
/// This is needed to avoid copy-pase. Because s3Cluster arguments only differ in additional argument (first) - cluster name
void TableFunctionS3::parseArgumentsImpl(
const String & error_message, ASTs & args, ContextPtr context, StorageS3::Configuration & s3_configuration, bool get_format_from_file)
void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context)
{
if (auto named_collection = tryGetNamedCollectionWithOverrides(args, context))
{
StorageS3::processNamedCollectionResult(s3_configuration, *named_collection);
StorageS3::processNamedCollectionResult(configuration, *named_collection);
}
else
{
if (args.empty() || args.size() > 6)
throw Exception::createDeprecated(error_message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The signature of table function {} shall be the following:\n{}", getName(), getSignature());
auto * header_it = StorageURL::collectHeaders(args, s3_configuration.headers_from_ast, context);
auto * header_it = StorageURL::collectHeaders(args, configuration.headers_from_ast, context);
if (header_it != args.end())
args.erase(header_it);
@ -130,46 +130,159 @@ void TableFunctionS3::parseArgumentsImpl(
}
/// This argument is always the first
s3_configuration.url = S3::URI(checkAndGetLiteralArgument<String>(args[0], "url"));
configuration.url = S3::URI(checkAndGetLiteralArgument<String>(args[0], "url"));
if (args_to_idx.contains("format"))
s3_configuration.format = checkAndGetLiteralArgument<String>(args[args_to_idx["format"]], "format");
configuration.format = checkAndGetLiteralArgument<String>(args[args_to_idx["format"]], "format");
if (args_to_idx.contains("structure"))
s3_configuration.structure = checkAndGetLiteralArgument<String>(args[args_to_idx["structure"]], "structure");
configuration.structure = checkAndGetLiteralArgument<String>(args[args_to_idx["structure"]], "structure");
if (args_to_idx.contains("compression_method"))
s3_configuration.compression_method = checkAndGetLiteralArgument<String>(args[args_to_idx["compression_method"]], "compression_method");
configuration.compression_method = checkAndGetLiteralArgument<String>(args[args_to_idx["compression_method"]], "compression_method");
if (args_to_idx.contains("access_key_id"))
s3_configuration.auth_settings.access_key_id = checkAndGetLiteralArgument<String>(args[args_to_idx["access_key_id"]], "access_key_id");
configuration.auth_settings.access_key_id = checkAndGetLiteralArgument<String>(args[args_to_idx["access_key_id"]], "access_key_id");
if (args_to_idx.contains("secret_access_key"))
s3_configuration.auth_settings.secret_access_key = checkAndGetLiteralArgument<String>(args[args_to_idx["secret_access_key"]], "secret_access_key");
configuration.auth_settings.secret_access_key = checkAndGetLiteralArgument<String>(args[args_to_idx["secret_access_key"]], "secret_access_key");
s3_configuration.auth_settings.no_sign_request = no_sign_request;
configuration.auth_settings.no_sign_request = no_sign_request;
}
s3_configuration.keys = {s3_configuration.url.key};
configuration.keys = {configuration.url.key};
/// For DataLake table functions, we should specify default format.
if (s3_configuration.format == "auto" && get_format_from_file)
s3_configuration.format = FormatFactory::instance().getFormatFromFileName(s3_configuration.url.uri.getPath(), true);
if (configuration.format == "auto")
configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.url.uri.getPath(), true);
}
void TableFunctionS3::parseArguments(const ASTPtr & ast_function, ContextPtr context)
{
/// Clone ast function, because we can modify its arguments like removing headers.
auto ast_copy = ast_function->clone();
/// Parse args
ASTs & args_func = ast_function->children;
const auto message = fmt::format("The signature of table function '{}' could be the following:\n{}", getName(), signature);
if (args_func.size() != 1)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' must have arguments.", getName());
auto & args = args_func.at(0)->children;
parseArgumentsImpl(message, args, context, configuration);
parseArgumentsImpl(args, context);
}
void TableFunctionS3::addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr & context)
{
if (tryGetNamedCollectionWithOverrides(args, context))
{
/// In case of named collection, just add key-value pair "structure='...'"
/// at the end of arguments to override existed structure.
ASTs equal_func_args = {std::make_shared<ASTIdentifier>("structure"), std::make_shared<ASTLiteral>(structure)};
auto equal_func = makeASTFunction("equals", std::move(equal_func_args));
args.push_back(equal_func);
}
else
{
/// If arguments contain headers, just remove it and add to the end of arguments later
/// (header argument can be at any position).
HTTPHeaderEntries tmp_headers;
auto * headers_it = StorageURL::collectHeaders(args, tmp_headers, context);
ASTPtr headers_ast;
if (headers_it != args.end())
{
headers_ast = *headers_it;
args.erase(headers_it);
}
if (args.empty() || args.size() > getMaxNumberOfArguments())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected 1 to {} arguments in table function, got {}", getMaxNumberOfArguments(), args.size());
auto structure_literal = std::make_shared<ASTLiteral>(structure);
/// s3(s3_url)
if (args.size() == 1)
{
/// Add format=auto before structure argument.
args.push_back(std::make_shared<ASTLiteral>("auto"));
args.push_back(structure_literal);
}
/// s3(s3_url, format) or s3(s3_url, NOSIGN)
/// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN or not.
else if (args.size() == 2)
{
auto second_arg = checkAndGetLiteralArgument<String>(args[1], "format/NOSIGN");
/// If there is NOSIGN, add format=auto before structure.
if (boost::iequals(second_arg, "NOSIGN"))
args.push_back(std::make_shared<ASTLiteral>("auto"));
args.push_back(structure_literal);
}
/// s3(source, format, structure) or
/// s3(source, access_key_id, access_key_id) or
/// s3(source, NOSIGN, format)
/// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN, format name or neither.
else if (args.size() == 3)
{
auto second_arg = checkAndGetLiteralArgument<String>(args[1], "format/NOSIGN");
if (boost::iequals(second_arg, "NOSIGN"))
{
args.push_back(structure_literal);
}
else if (second_arg == "auto" || FormatFactory::instance().getAllFormats().contains(second_arg))
{
args.back() = structure_literal;
}
else
{
/// Add format=auto before structure argument.
args.push_back(std::make_shared<ASTLiteral>("auto"));
args.push_back(structure_literal);
}
}
/// s3(source, format, structure, compression_method) or
/// s3(source, access_key_id, access_key_id, format) or
/// s3(source, NOSIGN, format, structure)
/// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN, format name or neither.
else if (args.size() == 4)
{
auto second_arg = checkAndGetLiteralArgument<String>(args[1], "format/NOSIGN");
if (boost::iequals(second_arg, "NOSIGN"))
{
args.back() = structure_literal;
}
else if (second_arg == "auto" || FormatFactory::instance().getAllFormats().contains(second_arg))
{
args[args.size() - 2] = structure_literal;
}
else
{
args.push_back(structure_literal);
}
}
/// s3(source, access_key_id, access_key_id, format, structure) or
/// s3(source, NOSIGN, format, structure, compression_method)
/// We can distinguish them by looking at the 2-nd argument: check if it's a NOSIGN keyword name or not.
else if (args.size() == 5)
{
auto sedond_arg = checkAndGetLiteralArgument<String>(args[1], "format/NOSIGN");
if (boost::iequals(sedond_arg, "NOSIGN"))
{
args[args.size() - 2] = structure_literal;
}
else
{
args.back() = structure_literal;
}
}
/// s3(source, access_key_id, access_key_id, format, structure, compression)
else if (args.size() == 6)
{
args[args.size() - 2] = structure_literal;
}
if (headers_ast)
args.push_back(headers_ast);
}
}
ColumnsDescription TableFunctionS3::getActualTableStructure(ContextPtr context) const

View File

@ -13,7 +13,7 @@ namespace DB
class Context;
/* s3(source, [access_key_id, secret_access_key,] format, structure[, compression]) - creates a temporary storage for a file in S3.
/* s3(source, [access_key_id, secret_access_key,] [format, structure, compression]) - creates a temporary storage for a file in S3.
*/
class TableFunctionS3 : public ITableFunction
{
@ -26,11 +26,21 @@ public:
" - url, format, structure, compression_method\n"
" - url, access_key_id, secret_access_key, format\n"
" - url, access_key_id, secret_access_key, format, structure\n"
" - url, access_key_id, secret_access_key, format, structure, compression_method";
std::string getName() const override
" - url, access_key_id, secret_access_key, format, structure, compression_method\n"
"All signatures supports optional headers (specified as `headers('name'='value', 'name2'='value2')`)";
static size_t getMaxNumberOfArguments() { return 6; }
String getName() const override
{
return name;
}
virtual String getSignature() const
{
return signature;
}
bool hasStaticStructure() const override { return configuration.structure != "auto"; }
bool needStructureHint() const override { return configuration.structure == "auto"; }
@ -44,12 +54,9 @@ public:
return {"_path", "_file"};
}
static void parseArgumentsImpl(
const String & error_message,
ASTs & args,
ContextPtr context,
StorageS3::Configuration & configuration,
bool get_format_from_file = true);
virtual void parseArgumentsImpl(ASTs & args, const ContextPtr & context);
static void addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr & context);
protected:

View File

@ -2,92 +2,19 @@
#if USE_AWS_S3
#include <Storages/StorageS3.h>
#include <Storages/checkAndGetLiteralArgument.h>
#include <DataTypes/DataTypeString.h>
#include <IO/S3Common.h>
#include <Interpreters/evaluateConstantExpression.h>
#include <Interpreters/Context.h>
#include <Interpreters/ClientInfo.h>
#include <TableFunctions/TableFunctionFactory.h>
#include <TableFunctions/TableFunctionS3.h>
#include <TableFunctions/TableFunctionS3Cluster.h>
#include <TableFunctions/TableFunctionFactory.h>
#include <Interpreters/parseColumnsListForTableFunction.h>
#include <Access/Common/AccessFlags.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/IAST_fwd.h>
#include <Storages/StorageS3.h>
#include "registerTableFunctions.h"
#include <memory>
#include <thread>
namespace DB
{
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int BAD_GET;
}
void TableFunctionS3Cluster::parseArguments(const ASTPtr & ast_function, ContextPtr context)
{
/// Parse args
ASTs & args_func = ast_function->children;
if (args_func.size() != 1)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' must have arguments.", getName());
ASTs & args = args_func.at(0)->children;
constexpr auto fmt_string = "The signature of table function {} could be the following:\n"
" - cluster, url\n"
" - cluster, url, format\n"
" - cluster, url, format, structure\n"
" - cluster, url, access_key_id, secret_access_key\n"
" - cluster, url, format, structure, compression_method\n"
" - cluster, url, access_key_id, secret_access_key, format\n"
" - cluster, url, access_key_id, secret_access_key, format, structure\n"
" - cluster, url, access_key_id, secret_access_key, format, structure, compression_method";
auto message = PreformattedMessage{fmt::format(fmt_string, getName()), fmt_string};
if (args.size() < 2 || args.size() > 7)
throw Exception::createDeprecated(message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
/// Evaluate only first argument, everything else will be done TableFunctionS3
args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(args[0], context);
/// Cluster name is always the first
cluster_name = checkAndGetLiteralArgument<String>(args[0], "cluster_name");
if (!context->tryGetCluster(cluster_name))
throw Exception(ErrorCodes::BAD_GET, "Requested cluster '{}' not found", cluster_name);
/// Just cut the first arg (cluster_name) and try to parse s3 table function arguments as is
ASTs clipped_args;
clipped_args.reserve(args.size() - 1);
std::copy(args.begin() + 1, args.end(), std::back_inserter(clipped_args));
/// StorageS3ClusterConfiguration inherints from StorageS3::Configuration, so it is safe to upcast it.
TableFunctionS3::parseArgumentsImpl(message.text, clipped_args, context, static_cast<StorageS3::Configuration &>(configuration));
}
ColumnsDescription TableFunctionS3Cluster::getActualTableStructure(ContextPtr context) const
{
context->checkAccess(getSourceAccessType());
configuration.update(context);
if (configuration.structure == "auto")
return StorageS3::getTableStructureFromData(configuration, std::nullopt, context);
return parseColumnsListFromString(configuration.structure, context);
}
StoragePtr TableFunctionS3Cluster::executeImpl(
const ASTPtr & /*function*/, ContextPtr context,
const std::string & table_name, ColumnsDescription /*cached_columns*/) const

View File

@ -6,6 +6,7 @@
#include <TableFunctions/ITableFunction.h>
#include <TableFunctions/TableFunctionS3.h>
#include <TableFunctions/ITableFunctionCluster.h>
#include <Storages/StorageS3Cluster.h>
@ -22,20 +23,29 @@ class Context;
* On worker node it asks initiator about next task to process, processes it.
* This is repeated until the tasks are finished.
*/
class TableFunctionS3Cluster : public ITableFunction
class TableFunctionS3Cluster : public ITableFunctionCluster<TableFunctionS3>
{
public:
static constexpr auto name = "s3Cluster";
std::string getName() const override
static constexpr auto signature = " - cluster, url\n"
" - cluster, url, format\n"
" - cluster, url, format, structure\n"
" - cluster, url, access_key_id, secret_access_key\n"
" - cluster, url, format, structure, compression_method\n"
" - cluster, url, access_key_id, secret_access_key, format\n"
" - cluster, url, access_key_id, secret_access_key, format, structure\n"
" - cluster, url, access_key_id, secret_access_key, format, structure, compression_method\n"
"All signatures supports optional headers (specified as `headers('name'='value', 'name2'='value2')`)";
String getName() const override
{
return name;
}
bool hasStaticStructure() const override { return configuration.structure != "auto"; }
bool needStructureHint() const override { return configuration.structure == "auto"; }
void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; }
String getSignature() const override
{
return signature;
}
protected:
StoragePtr executeImpl(
@ -45,15 +55,6 @@ protected:
ColumnsDescription cached_columns) const override;
const char * getStorageTypeName() const override { return "S3Cluster"; }
AccessType getSourceAccessType() const override { return AccessType::S3; }
ColumnsDescription getActualTableStructure(ContextPtr) const override;
void parseArguments(const ASTPtr &, ContextPtr) override;
String cluster_name;
mutable StorageS3::Configuration configuration;
ColumnsDescription structure_hint;
};
}

View File

@ -18,10 +18,6 @@
namespace DB
{
static const String bad_arguments_error_message = "Table function URL can have the following arguments: "
"url, name of used format (taken from file extension by default), "
"optional table structure, optional compression method, "
"optional headers (specified as `headers('name'='value', 'name2'='value2')`)";
namespace ErrorCodes
{
@ -48,15 +44,13 @@ std::vector<size_t> TableFunctionURL::skipAnalysisForArguments(const QueryTreeNo
void TableFunctionURL::parseArguments(const ASTPtr & ast, ContextPtr context)
{
const auto & ast_function = assert_cast<const ASTFunction *>(ast.get());
/// Clone ast function, because we can modify it's arguments like removing headers.
ITableFunctionFileLike::parseArguments(ast->clone(), context);
}
const auto & args = ast_function->children;
if (args.empty())
throw Exception::createDeprecated(bad_arguments_error_message, ErrorCodes::BAD_ARGUMENTS);
auto & url_function_args = assert_cast<ASTExpressionList *>(args[0].get())->children;
if (auto named_collection = tryGetNamedCollectionWithOverrides(url_function_args, context))
void TableFunctionURL::parseArgumentsImpl(ASTs & args, const ContextPtr & context)
{
if (auto named_collection = tryGetNamedCollectionWithOverrides(args, context))
{
StorageURL::processNamedCollectionResult(configuration, *named_collection);
@ -68,16 +62,46 @@ void TableFunctionURL::parseArguments(const ASTPtr & ast, ContextPtr context)
if (format == "auto")
format = FormatFactory::instance().getFormatFromFileName(Poco::URI(filename).getPath(), true);
StorageURL::collectHeaders(url_function_args, configuration.headers, context);
StorageURL::collectHeaders(args, configuration.headers, context);
}
else
{
auto * headers_it = StorageURL::collectHeaders(url_function_args, configuration.headers, context);
auto * headers_it = StorageURL::collectHeaders(args, configuration.headers, context);
/// ITableFunctionFileLike cannot parse headers argument, so remove it.
if (headers_it != url_function_args.end())
url_function_args.erase(headers_it);
if (headers_it != args.end())
args.erase(headers_it);
ITableFunctionFileLike::parseArguments(ast, context);
ITableFunctionFileLike::parseArgumentsImpl(args, context);
}
}
void TableFunctionURL::addColumnsStructureToArguments(ASTs & args, const String & desired_structure, const ContextPtr & context)
{
if (tryGetNamedCollectionWithOverrides(args, context))
{
/// In case of named collection, just add key-value pair "structure='...'"
/// at the end of arguments to override existed structure.
ASTs equal_func_args = {std::make_shared<ASTIdentifier>("structure"), std::make_shared<ASTLiteral>(desired_structure)};
auto equal_func = makeASTFunction("equals", std::move(equal_func_args));
args.push_back(equal_func);
}
else
{
/// If arguments contain headers, just remove it and add to the end of arguments later
/// (header argument can be at any position).
HTTPHeaderEntries tmp_headers;
auto * headers_it = StorageURL::collectHeaders(args, tmp_headers, context);
ASTPtr headers_ast;
if (headers_it != args.end())
{
headers_ast = *headers_it;
args.erase(headers_it);
}
ITableFunctionFileLike::addColumnsStructureToArguments(args, desired_structure, context);
if (headers_ast)
args.push_back(headers_ast);
}
}

View File

@ -10,21 +10,35 @@ namespace DB
class Context;
/* url(source, format[, structure, compression]) - creates a temporary storage from url.
/* url(source, [format, structure, compression]) - creates a temporary storage from url.
*/
class TableFunctionURL : public ITableFunctionFileLike
{
public:
static constexpr auto name = "url";
std::string getName() const override
static constexpr auto signature = " - uri\n"
" - uri, format\n"
" - uri, format, structure\n"
" - uri, format, structure, compression_method\n"
"All signatures supports optional headers (specified as `headers('name'='value', 'name2'='value2')`)";
String getName() const override
{
return name;
}
String getSignature() const override
{
return signature;
}
ColumnsDescription getActualTableStructure(ContextPtr context) const override;
static void addColumnsStructureToArguments(ASTs & args, const String & desired_structure, const ContextPtr & context);
protected:
void parseArguments(const ASTPtr & ast, ContextPtr context) override;
void parseArgumentsImpl(ASTs & args, const ContextPtr & context) override;
StorageURL::Configuration configuration;

View File

@ -1,83 +1,11 @@
#include <TableFunctions/TableFunctionURLCluster.h>
#include <TableFunctions/TableFunctionFactory.h>
#include "registerTableFunctions.h"
#include <Access/Common/AccessFlags.h>
#include <Interpreters/evaluateConstantExpression.h>
#include <Storages/checkAndGetLiteralArgument.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Storages/ColumnsDescription.h>
#include <Storages/StorageExternalDistributed.h>
#include <Storages/NamedCollectionsHelpers.h>
#include <TableFunctions/TableFunctionFactory.h>
#include <TableFunctions/TableFunctionURL.h>
#include <Interpreters/parseColumnsListForTableFunction.h>
#include <Interpreters/Context.h>
#include <Formats/FormatFactory.h>
namespace DB
{
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int BAD_GET;
}
void TableFunctionURLCluster::parseArguments(const ASTPtr & ast_function, ContextPtr context)
{
auto ast_copy = ast_function->clone();
/// Parse args
ASTs & args_func = ast_copy->children;
if (args_func.size() != 1)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function {} must have arguments", getName());
ASTs & args = args_func.at(0)->children;
if (args.size() < 2 || args.size() > 5)
throw Exception(
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"The signature of table function {} shall be the following:\n"
" - cluster, uri\n"
" - cluster, uri, format\n"
" - cluster, uri, format, structure\n"
" - cluster, uri, format, structure, compression_method",
getName());
for (auto & arg : args)
arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context);
/// This argument is always the first
cluster_name = checkAndGetLiteralArgument<String>(args[0], "cluster_name");
if (!context->tryGetCluster(cluster_name))
throw Exception(ErrorCodes::BAD_GET, "Requested cluster '{}' not found", cluster_name);
/// Just cut the first arg (cluster_name) and try to parse other table function arguments as is
args.erase(args.begin());
TableFunctionURL::parseArguments(ast_copy, context);
}
ColumnsDescription TableFunctionURLCluster::getActualTableStructure(ContextPtr context) const
{
if (structure == "auto")
{
context->checkAccess(getSourceAccessType());
return StorageURL::getTableStructureFromData(format,
filename,
chooseCompressionMethod(Poco::URI(filename).getPath(), compression_method),
configuration.headers,
std::nullopt,
context);
}
return parseColumnsListFromString(structure, context);
}
StoragePtr TableFunctionURLCluster::getStorage(
const String & /*source*/, const String & /*format_*/, const ColumnsDescription & columns, ContextPtr context,
const std::string & table_name, const String & /*compression_method_*/) const
@ -85,7 +13,7 @@ StoragePtr TableFunctionURLCluster::getStorage(
StoragePtr storage;
if (context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY)
{
//On worker node this uri won't contains globs
//On worker node this uri won't contain globs
storage = std::make_shared<StorageURL>(
filename,
StorageID(getDatabaseName(), table_name),

View File

@ -2,6 +2,7 @@
#include <TableFunctions/ITableFunctionFileLike.h>
#include <TableFunctions/TableFunctionURL.h>
#include <TableFunctions/ITableFunctionCluster.h>
#include <Storages/StorageURL.h>
#include <Storages/StorageURLCluster.h>
#include <IO/ReadWriteBufferFromHTTP.h>
@ -20,29 +21,32 @@ class Context;
* On worker node it asks initiator about next task to process, processes it.
* This is repeated until the tasks are finished.
*/
class TableFunctionURLCluster : public TableFunctionURL
class TableFunctionURLCluster : public ITableFunctionCluster<TableFunctionURL>
{
public:
static constexpr auto name = "urlCluster";
std::string getName() const override
static constexpr auto signature = " - cluster, uri\n"
" - cluster, uri, format\n"
" - cluster, uri, format, structure\n"
" - cluster, uri, format, structure, compression_method\n"
"All signatures supports optional headers (specified as `headers('name'='value', 'name2'='value2')`)";
String getName() const override
{
return name;
}
String getSignature() const override
{
return signature;
}
protected:
StoragePtr getStorage(
const String & source, const String & format_, const ColumnsDescription & columns, ContextPtr global_context,
const std::string & table_name, const String & compression_method_) const override;
const char * getStorageTypeName() const override { return "URLCluster"; }
AccessType getSourceAccessType() const override { return AccessType::URL; }
ColumnsDescription getActualTableStructure(ContextPtr) const override;
void parseArguments(const ASTPtr &, ContextPtr) override;
String cluster_name;
};
}

View File

@ -75,6 +75,21 @@ c3 Nullable(Int64)
c1 Nullable(Int64)
c2 Nullable(Int64)
c3 Nullable(Int64)
c1 Nullable(Int64)
c2 Nullable(Int64)
c3 Nullable(Int64)
c1 Nullable(Int64)
c2 Nullable(Int64)
c3 Nullable(Int64)
c1 Nullable(Int64)
c2 Nullable(Int64)
c3 Nullable(Int64)
c1 Nullable(Int64)
c2 Nullable(Int64)
c3 Nullable(Int64)
c1 Nullable(Int64)
c2 Nullable(Int64)
c3 Nullable(Int64)
0 0 0
0 0 0
1 2 3

View File

@ -20,6 +20,12 @@ desc urlCluster('test_cluster_one_shard_three_replicas_localhost', 'http://local
desc urlCluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'TSV', 'auto');
desc urlCluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'TSV', 'auto', 'auto');
desc urlCluster('test_cluster_one_shard_three_replicas_localhost', headers('X-ClickHouse-Database'='default'), 'http://localhost:11111/test/{a,b}.tsv');
desc urlCluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', headers('X-ClickHouse-Database'='default'), 'TSV');
desc urlCluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'auto', headers('X-ClickHouse-Database'='default'));
desc urlCluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'TSV', 'auto', headers('X-ClickHouse-Database'='default'));
desc urlCluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'TSV', headers('X-ClickHouse-Database'='default'), 'auto', 'auto');
select * from urlCluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv') order by c1, c2, c3;
select * from urlCluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'TSV') order by c1, c2, c3;
select * from urlCluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'auto') order by c1, c2, c3;

View File

@ -0,0 +1,16 @@
1 2 3
4 5 6
7 8 9
0 0 0
1 2 3
4 5 6
7 8 9
0 0 0
1 2 3
4 5 6
7 8 9
0 0 0
1 2 3
4 5 6
7 8 9
0 0 0

View File

@ -0,0 +1,14 @@
#!/usr/bin/env bash
# Tags: no-fasttest, no-s3-storage
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
$CLICKHOUSE_CLIENT -q "create named collection $CLICKHOUSE_TEST_UNIQUE_NAME as url='http://localhost:11111/test/a.tsv'"
$CLICKHOUSE_CLIENT -q "select * from s3Cluster(test_cluster_one_shard_three_replicas_localhost, $CLICKHOUSE_TEST_UNIQUE_NAME)"
$CLICKHOUSE_CLIENT -q "select * from s3Cluster(test_cluster_one_shard_three_replicas_localhost, $CLICKHOUSE_TEST_UNIQUE_NAME, structure='auto')"
$CLICKHOUSE_CLIENT -q "select * from urlCluster(test_cluster_one_shard_three_replicas_localhost, $CLICKHOUSE_TEST_UNIQUE_NAME)"
$CLICKHOUSE_CLIENT -q "select * from urlCluster(test_cluster_one_shard_three_replicas_localhost, $CLICKHOUSE_TEST_UNIQUE_NAME, structure='auto')"
$CLICKHOUSE_CLIENT -q "drop named collection $CLICKHOUSE_TEST_UNIQUE_NAME"