2021-09-09 13:47:48 +00:00
|
|
|
#include "UserDefinedExecutableFunctionFactory.h"
|
|
|
|
|
2021-12-22 15:20:36 +00:00
|
|
|
#include <filesystem>
|
|
|
|
|
|
|
|
#include <Common/filesystemHelpers.h>
|
2022-05-31 20:27:49 +00:00
|
|
|
#include <Common/FieldVisitorToString.h>
|
|
|
|
#include <DataTypes/FieldToDataType.h>
|
2021-12-22 15:20:36 +00:00
|
|
|
|
2021-10-15 20:18:20 +00:00
|
|
|
#include <Processors/Sources/ShellCommandSource.h>
|
2021-11-01 11:22:21 +00:00
|
|
|
#include <Processors/Sources/SourceFromSingleChunk.h>
|
2021-10-15 20:18:20 +00:00
|
|
|
#include <Formats/formatBlock.h>
|
2021-09-17 13:04:44 +00:00
|
|
|
|
2021-09-09 13:47:48 +00:00
|
|
|
#include <Functions/FunctionFactory.h>
|
2021-09-17 13:04:44 +00:00
|
|
|
#include <Functions/FunctionHelpers.h>
|
2021-09-09 13:47:48 +00:00
|
|
|
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
2022-05-31 20:27:49 +00:00
|
|
|
#include <Interpreters/convertFieldToType.h>
|
2021-09-17 13:04:44 +00:00
|
|
|
#include <Interpreters/ExternalUserDefinedExecutableFunctionsLoader.h>
|
|
|
|
#include <Interpreters/Context.h>
|
2021-10-02 20:29:09 +00:00
|
|
|
#include <Interpreters/castColumn.h>
|
2021-09-09 13:47:48 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
2021-09-17 13:04:44 +00:00
|
|
|
extern const int UNSUPPORTED_METHOD;
|
2022-05-31 20:27:49 +00:00
|
|
|
extern const int BAD_ARGUMENTS;
|
2021-09-09 13:47:48 +00:00
|
|
|
}
|
|
|
|
|
2021-09-17 13:04:44 +00:00
|
|
|
class UserDefinedFunction final : public IFunction
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
|
|
|
|
explicit UserDefinedFunction(
|
|
|
|
ExternalUserDefinedExecutableFunctionsLoader::UserDefinedExecutableFunctionPtr executable_function_,
|
2022-05-31 20:27:49 +00:00
|
|
|
ContextPtr context_,
|
|
|
|
Array parameters_)
|
2021-09-17 13:04:44 +00:00
|
|
|
: executable_function(std::move(executable_function_))
|
|
|
|
, context(context_)
|
|
|
|
{
|
2022-05-31 20:27:49 +00:00
|
|
|
const auto & configuration = executable_function->getConfiguration();
|
|
|
|
size_t command_parameters_size = configuration.parameters.size();
|
2022-06-01 10:52:54 +00:00
|
|
|
if (command_parameters_size != parameters_.size())
|
2022-05-31 20:27:49 +00:00
|
|
|
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
|
|
|
"Executable user defined function {} number of parameters does not match. Expected {}. Actual {}",
|
|
|
|
configuration.name,
|
|
|
|
command_parameters_size,
|
|
|
|
parameters_.size());
|
|
|
|
|
|
|
|
command_with_parameters = configuration.command;
|
|
|
|
command_arguments_with_parameters = configuration.command_arguments;
|
|
|
|
|
|
|
|
for (size_t i = 0; i < command_parameters_size; ++i)
|
|
|
|
{
|
|
|
|
const auto & command_parameter = configuration.parameters[i];
|
|
|
|
const auto & parameter_value = parameters_[i];
|
|
|
|
auto converted_parameter = convertFieldToTypeOrThrow(parameter_value, *command_parameter.type);
|
|
|
|
auto parameter_placeholder = "{" + command_parameter.name + "}";
|
|
|
|
|
|
|
|
auto parameter_value_string = applyVisitor(FieldVisitorToString(), converted_parameter);
|
|
|
|
bool find_placedholder = false;
|
|
|
|
|
2022-06-01 10:52:54 +00:00
|
|
|
auto try_replace_parameter_placeholder_with_value = [&](std::string & command_part)
|
|
|
|
{
|
|
|
|
size_t previous_parameter_placeholder_position = 0;
|
2022-05-31 20:27:49 +00:00
|
|
|
|
2022-06-01 10:52:54 +00:00
|
|
|
while (true)
|
|
|
|
{
|
|
|
|
auto parameter_placeholder_position = command_part.find(parameter_placeholder, previous_parameter_placeholder_position);
|
|
|
|
if (parameter_placeholder_position == std::string::npos)
|
|
|
|
break;
|
2022-05-31 20:27:49 +00:00
|
|
|
|
2022-06-01 10:52:54 +00:00
|
|
|
size_t parameter_placeholder_size = parameter_placeholder.size();
|
|
|
|
command_part.replace(parameter_placeholder_position, parameter_placeholder_size, parameter_value_string);
|
|
|
|
previous_parameter_placeholder_position = parameter_placeholder_position + parameter_value_string.size();
|
|
|
|
find_placedholder = true;
|
|
|
|
}
|
2022-05-31 20:27:49 +00:00
|
|
|
|
|
|
|
find_placedholder = true;
|
2022-06-01 10:52:54 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
for (auto & command_argument : command_arguments_with_parameters)
|
|
|
|
try_replace_parameter_placeholder_with_value(command_argument);
|
|
|
|
|
|
|
|
try_replace_parameter_placeholder_with_value(command_with_parameters);
|
2022-05-31 20:27:49 +00:00
|
|
|
|
|
|
|
if (!find_placedholder)
|
|
|
|
{
|
|
|
|
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
|
|
|
"Executable user defined function {} no placeholder for parameter {}",
|
|
|
|
configuration.name,
|
|
|
|
command_parameter.name);
|
|
|
|
}
|
|
|
|
}
|
2021-09-17 13:04:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
String getName() const override { return executable_function->getConfiguration().name; }
|
|
|
|
|
|
|
|
bool isVariadic() const override { return false; }
|
|
|
|
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
|
2022-02-16 15:31:30 +00:00
|
|
|
size_t getNumberOfArguments() const override { return executable_function->getConfiguration().arguments.size(); }
|
2021-09-17 13:04:44 +00:00
|
|
|
|
|
|
|
bool useDefaultImplementationForConstants() const override { return true; }
|
2022-05-31 16:46:33 +00:00
|
|
|
bool useDefaultImplementationForNulls() const override { return false; }
|
2021-09-17 13:04:44 +00:00
|
|
|
bool isDeterministic() const override { return false; }
|
2022-03-03 12:40:42 +00:00
|
|
|
bool isDeterministicInScopeOfQuery() const override { return false; }
|
2021-09-17 13:04:44 +00:00
|
|
|
|
2021-10-02 20:29:09 +00:00
|
|
|
DataTypePtr getReturnTypeImpl(const DataTypes &) const override
|
2021-09-17 13:04:44 +00:00
|
|
|
{
|
|
|
|
const auto & configuration = executable_function->getConfiguration();
|
|
|
|
return configuration.result_type;
|
|
|
|
}
|
|
|
|
|
|
|
|
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
|
|
|
|
{
|
2022-04-04 13:56:01 +00:00
|
|
|
/// Do not start user defined script during query analysis. Because user script startup could be heavy.
|
|
|
|
if (input_rows_count == 0)
|
|
|
|
return result_type->createColumn();
|
|
|
|
|
2021-12-24 10:39:48 +00:00
|
|
|
auto coordinator = executable_function->getCoordinator();
|
|
|
|
const auto & coordinator_configuration = coordinator->getConfiguration();
|
2021-10-02 20:29:09 +00:00
|
|
|
const auto & configuration = executable_function->getConfiguration();
|
2021-12-22 15:20:36 +00:00
|
|
|
|
2022-05-31 20:27:49 +00:00
|
|
|
String command = command_with_parameters;
|
2021-12-22 15:20:36 +00:00
|
|
|
|
2021-12-24 10:39:48 +00:00
|
|
|
if (coordinator_configuration.execute_direct)
|
|
|
|
{
|
|
|
|
auto user_scripts_path = context->getUserScriptsPath();
|
|
|
|
auto script_path = user_scripts_path + '/' + command;
|
|
|
|
|
|
|
|
if (!fileOrSymlinkPathStartsWith(script_path, user_scripts_path))
|
|
|
|
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
|
|
|
|
"Executable file {} must be inside user scripts folder {}",
|
|
|
|
command,
|
|
|
|
user_scripts_path);
|
|
|
|
|
2022-04-04 12:23:34 +00:00
|
|
|
if (!FS::exists(script_path))
|
2021-12-24 10:39:48 +00:00
|
|
|
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
|
|
|
|
"Executable file {} does not exist inside user scripts folder {}",
|
|
|
|
command,
|
|
|
|
user_scripts_path);
|
|
|
|
|
2022-04-04 12:23:34 +00:00
|
|
|
if (!FS::canExecute(script_path))
|
|
|
|
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
|
|
|
|
"Executable file {} is not executable inside user scripts folder {}",
|
|
|
|
command,
|
|
|
|
user_scripts_path);
|
|
|
|
|
2021-12-24 10:39:48 +00:00
|
|
|
command = std::move(script_path);
|
|
|
|
}
|
2021-12-22 15:20:36 +00:00
|
|
|
|
2021-12-24 10:39:48 +00:00
|
|
|
size_t argument_size = arguments.size();
|
2021-10-02 20:29:09 +00:00
|
|
|
auto arguments_copy = arguments;
|
|
|
|
|
2021-12-24 10:39:48 +00:00
|
|
|
for (size_t i = 0; i < argument_size; ++i)
|
2021-10-02 20:29:09 +00:00
|
|
|
{
|
|
|
|
auto & column_with_type = arguments_copy[i];
|
|
|
|
column_with_type.column = column_with_type.column->convertToFullColumnIfConst();
|
|
|
|
|
2022-02-16 15:31:30 +00:00
|
|
|
const auto & argument = configuration.arguments[i];
|
2022-02-18 15:21:11 +00:00
|
|
|
column_with_type.name = argument.name;
|
2022-02-16 15:31:30 +00:00
|
|
|
|
|
|
|
const auto & argument_type = argument.type;
|
|
|
|
|
2021-10-02 20:29:09 +00:00
|
|
|
if (areTypesEqual(arguments_copy[i].type, argument_type))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
ColumnWithTypeAndName column_to_cast = {column_with_type.column, column_with_type.type, column_with_type.name};
|
|
|
|
column_with_type.column = castColumnAccurate(column_to_cast, argument_type);
|
|
|
|
column_with_type.type = argument_type;
|
|
|
|
|
2022-01-02 13:21:52 +00:00
|
|
|
column_with_type = std::move(column_to_cast);
|
2021-10-02 20:29:09 +00:00
|
|
|
}
|
|
|
|
|
2022-02-18 15:21:11 +00:00
|
|
|
ColumnWithTypeAndName result(result_type, configuration.result_name);
|
2021-09-17 13:04:44 +00:00
|
|
|
Block result_block({result});
|
|
|
|
|
2021-10-02 20:29:09 +00:00
|
|
|
Block arguments_block(arguments_copy);
|
2021-11-01 11:22:21 +00:00
|
|
|
auto source = std::make_shared<SourceFromSingleChunk>(std::move(arguments_block));
|
|
|
|
auto shell_input_pipe = Pipe(std::move(source));
|
2021-09-17 13:04:44 +00:00
|
|
|
|
|
|
|
ShellCommandSourceConfiguration shell_command_source_configuration;
|
|
|
|
|
2022-01-02 13:21:52 +00:00
|
|
|
if (coordinator_configuration.is_executable_pool)
|
2021-09-17 13:04:44 +00:00
|
|
|
{
|
|
|
|
shell_command_source_configuration.read_fixed_number_of_rows = true;
|
|
|
|
shell_command_source_configuration.number_of_rows_to_read = input_rows_count;
|
|
|
|
}
|
|
|
|
|
2021-11-01 11:22:21 +00:00
|
|
|
Pipes shell_input_pipes;
|
|
|
|
shell_input_pipes.emplace_back(std::move(shell_input_pipe));
|
|
|
|
|
|
|
|
Pipe pipe = coordinator->createPipe(
|
2021-12-24 10:39:48 +00:00
|
|
|
command,
|
2022-05-31 20:27:49 +00:00
|
|
|
command_arguments_with_parameters,
|
2021-11-01 11:22:21 +00:00
|
|
|
std::move(shell_input_pipes),
|
|
|
|
result_block,
|
2021-09-17 13:04:44 +00:00
|
|
|
context,
|
2021-11-01 11:22:21 +00:00
|
|
|
shell_command_source_configuration);
|
2021-09-17 13:04:44 +00:00
|
|
|
|
2021-09-20 17:54:01 +00:00
|
|
|
QueryPipeline pipeline(std::move(pipe));
|
2021-09-17 13:04:44 +00:00
|
|
|
PullingPipelineExecutor executor(pipeline);
|
|
|
|
|
|
|
|
auto result_column = result_type->createColumn();
|
|
|
|
result_column->reserve(input_rows_count);
|
|
|
|
|
|
|
|
Block block;
|
|
|
|
while (executor.pull(block))
|
|
|
|
{
|
|
|
|
const auto & result_column_to_add = *block.safeGetByPosition(0).column;
|
|
|
|
result_column->insertRangeFrom(result_column_to_add, 0, result_column_to_add.size());
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t result_column_size = result_column->size();
|
|
|
|
if (result_column_size != input_rows_count)
|
|
|
|
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
|
2022-01-02 13:21:52 +00:00
|
|
|
"Function {}: wrong result, expected {} row(s), actual {}",
|
|
|
|
quoteString(getName()),
|
2021-09-17 13:04:44 +00:00
|
|
|
input_rows_count,
|
|
|
|
result_column_size);
|
|
|
|
|
|
|
|
return result_column;
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
ExternalUserDefinedExecutableFunctionsLoader::UserDefinedExecutableFunctionPtr executable_function;
|
|
|
|
ContextPtr context;
|
2022-05-31 20:27:49 +00:00
|
|
|
String command_with_parameters;
|
|
|
|
std::vector<std::string> command_arguments_with_parameters;
|
2021-09-17 13:04:44 +00:00
|
|
|
};
|
|
|
|
|
2021-09-09 13:47:48 +00:00
|
|
|
UserDefinedExecutableFunctionFactory & UserDefinedExecutableFunctionFactory::instance()
|
|
|
|
{
|
|
|
|
static UserDefinedExecutableFunctionFactory result;
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2022-05-31 20:27:49 +00:00
|
|
|
FunctionOverloadResolverPtr UserDefinedExecutableFunctionFactory::get(const String & function_name, ContextPtr context, Array parameters)
|
2021-09-09 13:47:48 +00:00
|
|
|
{
|
2021-09-17 13:04:44 +00:00
|
|
|
const auto & loader = context->getExternalUserDefinedExecutableFunctionsLoader();
|
|
|
|
auto executable_function = std::static_pointer_cast<const UserDefinedExecutableFunction>(loader.load(function_name));
|
2022-05-31 20:27:49 +00:00
|
|
|
auto function = std::make_shared<UserDefinedFunction>(std::move(executable_function), std::move(context), std::move(parameters));
|
2021-09-17 13:04:44 +00:00
|
|
|
return std::make_unique<FunctionToOverloadResolverAdaptor>(std::move(function));
|
2021-09-09 13:47:48 +00:00
|
|
|
}
|
|
|
|
|
2022-05-31 20:27:49 +00:00
|
|
|
FunctionOverloadResolverPtr UserDefinedExecutableFunctionFactory::tryGet(const String & function_name, ContextPtr context, Array parameters)
|
2021-09-09 13:47:48 +00:00
|
|
|
{
|
2021-09-17 13:04:44 +00:00
|
|
|
const auto & loader = context->getExternalUserDefinedExecutableFunctionsLoader();
|
|
|
|
auto load_result = loader.getLoadResult(function_name);
|
2021-09-09 13:47:48 +00:00
|
|
|
|
2021-09-17 13:04:44 +00:00
|
|
|
if (load_result.object)
|
|
|
|
{
|
|
|
|
auto executable_function = std::static_pointer_cast<const UserDefinedExecutableFunction>(load_result.object);
|
2022-05-31 20:27:49 +00:00
|
|
|
auto function = std::make_shared<UserDefinedFunction>(std::move(executable_function), std::move(context), std::move(parameters));
|
2021-09-17 13:04:44 +00:00
|
|
|
return std::make_unique<FunctionToOverloadResolverAdaptor>(std::move(function));
|
|
|
|
}
|
2021-09-09 13:47:48 +00:00
|
|
|
|
2021-09-17 13:04:44 +00:00
|
|
|
return nullptr;
|
2021-09-09 13:47:48 +00:00
|
|
|
}
|
|
|
|
|
2021-10-27 15:49:18 +00:00
|
|
|
bool UserDefinedExecutableFunctionFactory::has(const String & function_name, ContextPtr context)
|
|
|
|
{
|
|
|
|
const auto & loader = context->getExternalUserDefinedExecutableFunctionsLoader();
|
|
|
|
auto load_result = loader.getLoadResult(function_name);
|
|
|
|
|
|
|
|
bool result = load_result.object != nullptr;
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2021-09-17 13:04:44 +00:00
|
|
|
std::vector<String> UserDefinedExecutableFunctionFactory::getRegisteredNames(ContextPtr context)
|
2021-09-09 13:47:48 +00:00
|
|
|
{
|
2021-09-17 13:04:44 +00:00
|
|
|
const auto & loader = context->getExternalUserDefinedExecutableFunctionsLoader();
|
|
|
|
auto loaded_objects = loader.getLoadedObjects();
|
2021-09-09 13:47:48 +00:00
|
|
|
|
|
|
|
std::vector<std::string> registered_names;
|
2021-09-17 13:04:44 +00:00
|
|
|
registered_names.reserve(loaded_objects.size());
|
2021-09-09 13:47:48 +00:00
|
|
|
|
2021-09-17 13:04:44 +00:00
|
|
|
for (auto & loaded_object : loaded_objects)
|
|
|
|
registered_names.emplace_back(loaded_object->getLoadableName());
|
2021-09-09 13:47:48 +00:00
|
|
|
|
|
|
|
return registered_names;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|