2022-09-28 08:45:15 +00:00
|
|
|
#include "config.h"
|
2021-03-16 18:41:29 +00:00
|
|
|
|
|
|
|
#if USE_AWS_S3
|
|
|
|
|
2021-04-12 21:42:52 +00:00
|
|
|
#include <Storages/StorageS3Cluster.h>
|
2022-06-23 20:04:06 +00:00
|
|
|
#include <Storages/StorageS3.h>
|
|
|
|
#include <Storages/checkAndGetLiteralArgument.h>
|
2021-04-06 19:18:45 +00:00
|
|
|
|
|
|
|
#include <DataTypes/DataTypeString.h>
|
2021-03-16 18:41:29 +00:00
|
|
|
#include <IO/S3Common.h>
|
|
|
|
#include <Interpreters/evaluateConstantExpression.h>
|
|
|
|
#include <Interpreters/Context.h>
|
2021-04-12 17:07:01 +00:00
|
|
|
#include <Interpreters/ClientInfo.h>
|
2021-03-16 18:41:29 +00:00
|
|
|
#include <TableFunctions/TableFunctionFactory.h>
|
|
|
|
#include <TableFunctions/TableFunctionS3.h>
|
2021-04-12 21:42:52 +00:00
|
|
|
#include <TableFunctions/TableFunctionS3Cluster.h>
|
2022-08-16 09:41:32 +00:00
|
|
|
#include <Interpreters/parseColumnsListForTableFunction.h>
|
2022-09-04 16:58:39 +00:00
|
|
|
#include <Access/Common/AccessFlags.h>
|
2021-03-16 18:41:29 +00:00
|
|
|
#include <Parsers/ASTLiteral.h>
|
2021-04-12 17:07:01 +00:00
|
|
|
#include <Parsers/ASTExpressionList.h>
|
|
|
|
#include <Parsers/IAST_fwd.h>
|
2021-04-12 17:33:55 +00:00
|
|
|
|
|
|
|
#include "registerTableFunctions.h"
|
2021-04-12 17:07:01 +00:00
|
|
|
|
|
|
|
#include <memory>
|
|
|
|
#include <thread>
|
2021-03-16 18:41:29 +00:00
|
|
|
|
2021-04-22 01:25:40 +00:00
|
|
|
|
2021-03-16 18:41:29 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
2022-03-28 22:46:35 +00:00
|
|
|
extern const int BAD_GET;
|
2021-03-16 18:41:29 +00:00
|
|
|
}
|
|
|
|
|
2021-04-06 19:18:45 +00:00
|
|
|
|
2021-04-12 21:42:52 +00:00
|
|
|
void TableFunctionS3Cluster::parseArguments(const ASTPtr & ast_function, ContextPtr context)
|
2021-03-16 18:41:29 +00:00
|
|
|
{
|
|
|
|
/// Parse args
|
|
|
|
ASTs & args_func = ast_function->children;
|
|
|
|
|
|
|
|
if (args_func.size() != 1)
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' must have arguments.", getName());
|
2021-03-16 18:41:29 +00:00
|
|
|
|
|
|
|
ASTs & args = args_func.at(0)->children;
|
|
|
|
|
2022-03-28 22:46:35 +00:00
|
|
|
for (auto & arg : args)
|
|
|
|
arg = evaluateConstantExpressionAsLiteral(arg, context);
|
|
|
|
|
2023-01-23 21:13:58 +00:00
|
|
|
constexpr auto fmt_string = "The signature of table function {} could be the following:\n"
|
|
|
|
" - cluster, url\n"
|
|
|
|
" - cluster, url, format\n"
|
|
|
|
" - cluster, url, format, structure\n"
|
|
|
|
" - cluster, url, access_key_id, secret_access_key\n"
|
|
|
|
" - cluster, url, format, structure, compression_method\n"
|
|
|
|
" - cluster, url, access_key_id, secret_access_key, format\n"
|
|
|
|
" - cluster, url, access_key_id, secret_access_key, format, structure\n"
|
|
|
|
" - cluster, url, access_key_id, secret_access_key, format, structure, compression_method";
|
|
|
|
auto message = PreformattedMessage{fmt::format(fmt_string, getName()), fmt_string};
|
2022-03-28 22:46:35 +00:00
|
|
|
if (args.size() < 2 || args.size() > 7)
|
2021-04-06 19:18:45 +00:00
|
|
|
throw Exception(message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
2021-03-16 18:41:29 +00:00
|
|
|
|
2021-04-13 19:57:01 +00:00
|
|
|
/// This arguments are always the first
|
2022-06-23 20:04:06 +00:00
|
|
|
configuration.cluster_name = checkAndGetLiteralArgument<String>(args[0], "cluster_name");
|
2021-04-13 19:57:01 +00:00
|
|
|
|
2022-03-28 22:46:35 +00:00
|
|
|
if (!context->tryGetCluster(configuration.cluster_name))
|
|
|
|
throw Exception(ErrorCodes::BAD_GET, "Requested cluster '{}' not found", configuration.cluster_name);
|
2021-04-13 19:57:01 +00:00
|
|
|
|
2022-03-28 22:46:35 +00:00
|
|
|
/// Just cut the first arg (cluster_name) and try to parse s3 table function arguments as is
|
|
|
|
ASTs clipped_args;
|
|
|
|
clipped_args.reserve(args.size());
|
|
|
|
std::copy(args.begin() + 1, args.end(), std::back_inserter(clipped_args));
|
2021-04-13 19:57:01 +00:00
|
|
|
|
2022-03-28 22:46:35 +00:00
|
|
|
/// StorageS3ClusterConfiguration inherints from StorageS3Configuration, so it is safe to upcast it.
|
2023-01-23 21:13:58 +00:00
|
|
|
TableFunctionS3::parseArgumentsImpl(message.message, clipped_args, context, static_cast<StorageS3Configuration & >(configuration));
|
2021-03-16 18:41:29 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2021-04-12 21:42:52 +00:00
|
|
|
ColumnsDescription TableFunctionS3Cluster::getActualTableStructure(ContextPtr context) const
|
2021-03-16 18:41:29 +00:00
|
|
|
{
|
2022-09-13 13:07:43 +00:00
|
|
|
context->checkAccess(getSourceAccessType());
|
|
|
|
|
2022-03-28 22:46:35 +00:00
|
|
|
if (configuration.structure == "auto")
|
2022-09-13 13:07:43 +00:00
|
|
|
return StorageS3::getTableStructureFromData(configuration, false, std::nullopt, context);
|
2022-03-28 22:46:35 +00:00
|
|
|
|
|
|
|
return parseColumnsListFromString(configuration.structure, context);
|
2021-03-16 18:41:29 +00:00
|
|
|
}
|
|
|
|
|
2021-04-12 21:42:52 +00:00
|
|
|
StoragePtr TableFunctionS3Cluster::executeImpl(
|
2021-04-12 19:35:26 +00:00
|
|
|
const ASTPtr & /*function*/, ContextPtr context,
|
2021-03-19 21:49:18 +00:00
|
|
|
const std::string & table_name, ColumnsDescription /*cached_columns*/) const
|
2021-03-16 18:41:29 +00:00
|
|
|
{
|
2021-04-13 20:17:25 +00:00
|
|
|
StoragePtr storage;
|
2022-03-28 22:46:35 +00:00
|
|
|
ColumnsDescription columns;
|
2023-01-18 20:33:50 +00:00
|
|
|
bool structure_argument_was_provided = configuration.structure != "auto";
|
2022-09-13 13:07:43 +00:00
|
|
|
|
2023-01-18 20:33:50 +00:00
|
|
|
if (structure_argument_was_provided)
|
2022-09-13 13:07:43 +00:00
|
|
|
{
|
2022-03-28 22:46:35 +00:00
|
|
|
columns = parseColumnsListFromString(configuration.structure, context);
|
2022-09-13 13:07:43 +00:00
|
|
|
}
|
2022-03-28 22:46:35 +00:00
|
|
|
else if (!structure_hint.empty())
|
2022-09-13 13:07:43 +00:00
|
|
|
{
|
2022-03-28 22:46:35 +00:00
|
|
|
columns = structure_hint;
|
2022-09-13 13:07:43 +00:00
|
|
|
}
|
2022-03-28 22:46:35 +00:00
|
|
|
|
2021-04-13 20:17:25 +00:00
|
|
|
if (context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY)
|
|
|
|
{
|
|
|
|
/// On worker node this filename won't contains globs
|
2022-04-19 20:47:29 +00:00
|
|
|
storage = std::make_shared<StorageS3>(
|
2022-09-13 13:07:43 +00:00
|
|
|
configuration,
|
2021-04-23 12:18:23 +00:00
|
|
|
StorageID(getDatabaseName(), table_name),
|
2022-03-28 22:46:35 +00:00
|
|
|
columns,
|
2021-04-23 12:18:23 +00:00
|
|
|
ConstraintsDescription{},
|
2022-09-13 13:07:43 +00:00
|
|
|
/* comment */String{},
|
2021-04-23 12:18:23 +00:00
|
|
|
context,
|
2022-09-13 13:07:43 +00:00
|
|
|
/* format_settings */std::nullopt, /// No format_settings for S3Cluster
|
2021-04-23 12:18:23 +00:00
|
|
|
/*distributed_processing=*/true);
|
2021-04-13 20:17:25 +00:00
|
|
|
}
|
2021-04-13 20:19:04 +00:00
|
|
|
else
|
|
|
|
{
|
2022-04-19 20:47:29 +00:00
|
|
|
storage = std::make_shared<StorageS3Cluster>(
|
2022-09-13 13:07:43 +00:00
|
|
|
configuration,
|
2022-03-28 22:46:35 +00:00
|
|
|
StorageID(getDatabaseName(), table_name),
|
|
|
|
columns,
|
|
|
|
ConstraintsDescription{},
|
2023-01-18 20:33:50 +00:00
|
|
|
context,
|
|
|
|
structure_argument_was_provided);
|
2021-04-13 20:17:25 +00:00
|
|
|
}
|
2021-03-16 18:41:29 +00:00
|
|
|
|
|
|
|
storage->startup();
|
|
|
|
|
|
|
|
return storage;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2021-04-12 21:42:52 +00:00
|
|
|
void registerTableFunctionS3Cluster(TableFunctionFactory & factory)
|
2021-03-16 18:41:29 +00:00
|
|
|
{
|
2021-04-12 21:42:52 +00:00
|
|
|
factory.registerFunction<TableFunctionS3Cluster>();
|
2021-03-16 18:41:29 +00:00
|
|
|
}
|
|
|
|
|
2021-04-06 19:18:45 +00:00
|
|
|
|
2021-03-16 18:41:29 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|