Executable user defined functions extract parameters name and type from command value

This commit is contained in:
Maksim Kita 2022-06-01 12:52:54 +02:00
parent c4da2540e9
commit d14193b3ad
4 changed files with 100 additions and 39 deletions

View File

@ -960,7 +960,8 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
if (UserDefinedExecutableFunctionFactory::instance().has(node.name, current_context))
{
Array parameters;
if (node.parameters) {
if (node.parameters)
{
auto & node_parameters = node.parameters->children;
size_t parameters_size = node_parameters.size();
parameters.resize(parameters_size);

View File

@ -1,6 +1,7 @@
#include "ExternalUserDefinedExecutableFunctionsLoader.h"
#include <boost/algorithm/string/split.hpp>
#include <Common/StringUtils/StringUtils.h>
#include <DataTypes/DataTypeFactory.h>
@ -18,6 +19,79 @@ namespace ErrorCodes
extern const int BAD_ARGUMENTS;
extern const int FUNCTION_ALREADY_EXISTS;
extern const int UNSUPPORTED_METHOD;
extern const int TYPE_MISMATCH;
}
namespace
{
/** Extract parameters from command and replace them with parameter names placeholders.
* Example: test_script.py {parameter_name: UInt64}
* After run function: test_script.py {parameter_name}
*/
std::vector<UserDefinedExecutableFunctionParameter> extractParametersFromCommand(String & command_value)
{
std::vector<UserDefinedExecutableFunctionParameter> parameters;
std::unordered_map<std::string_view, DataTypePtr> parameter_name_to_type;
size_t previous_parameter_match_position = 0;
while (true)
{
auto start_parameter_pos = command_value.find('{', previous_parameter_match_position);
if (start_parameter_pos == std::string::npos)
break;
auto end_parameter_pos = command_value.find('}', start_parameter_pos);
if (end_parameter_pos == std::string::npos)
break;
previous_parameter_match_position = start_parameter_pos + 1;
auto semicolon_pos = command_value.find(':', start_parameter_pos);
if (semicolon_pos == std::string::npos)
break;
else if (semicolon_pos > end_parameter_pos)
continue;
std::string parameter_name(command_value.data() + start_parameter_pos + 1, command_value.data() + semicolon_pos);
trim(parameter_name);
bool is_identifier = std::all_of(parameter_name.begin(), parameter_name.end(), [](char character)
{
return isWordCharASCII(character);
});
if (parameter_name.empty() && !is_identifier)
continue;
std::string data_type_name(command_value.data() + semicolon_pos + 1, command_value.data() + end_parameter_pos);
trim(data_type_name);
if (data_type_name.empty())
continue;
DataTypePtr parameter_data_type = DataTypeFactory::instance().get(data_type_name);
auto parameter_name_to_type_it = parameter_name_to_type.find(parameter_name);
if (parameter_name_to_type_it != parameter_name_to_type.end() && !parameter_data_type->equals(*parameter_name_to_type_it->second))
throw Exception(ErrorCodes::TYPE_MISMATCH,
"Multiple parameters with same name {} does not have same type. Expected {}. Actual {}",
parameter_name,
parameter_name_to_type_it->second->getName(),
parameter_data_type->getName());
size_t replace_size = end_parameter_pos - start_parameter_pos - 1;
command_value.replace(start_parameter_pos + 1, replace_size, parameter_name);
previous_parameter_match_position = start_parameter_pos + parameter_name.size();
if (parameter_name_to_type_it == parameter_name_to_type.end())
{
parameters.emplace_back(UserDefinedExecutableFunctionParameter{std::move(parameter_name), std::move(parameter_data_type)});
auto & last_parameter = parameters.back();
parameter_name_to_type.emplace(last_parameter.name, last_parameter.type);
}
}
return parameters;
}
}
ExternalUserDefinedExecutableFunctionsLoader::ExternalUserDefinedExecutableFunctionsLoader(ContextPtr global_context_)
@ -72,6 +146,8 @@ ExternalLoader::LoadablePtr ExternalUserDefinedExecutableFunctionsLoader::create
bool execute_direct = config.getBool(key_in_config + ".execute_direct", true);
String command_value = config.getString(key_in_config + ".command");
std::vector<UserDefinedExecutableFunctionParameter> parameters = extractParametersFromCommand(command_value);
std::vector<String> command_arguments;
if (execute_direct)
@ -112,7 +188,6 @@ ExternalLoader::LoadablePtr ExternalUserDefinedExecutableFunctionsLoader::create
lifetime = ExternalLoadableLifetime(config, key_in_config + ".lifetime");
std::vector<UserDefinedExecutableFunctionArgument> arguments;
std::vector<UserDefinedExecutableFunctionParameter> parameters;
Poco::Util::AbstractConfiguration::Keys config_elems;
config.keys(key_in_config, config_elems);
@ -139,25 +214,9 @@ ExternalLoader::LoadablePtr ExternalUserDefinedExecutableFunctionsLoader::create
arguments.emplace_back(std::move(argument));
}
for (const auto & config_elem : config_elems)
{
if (!startsWith(config_elem, "parameter"))
continue;
UserDefinedExecutableFunctionParameter parameter;
const auto parameter_prefix = key_in_config + '.' + config_elem + '.';
parameter.type = DataTypeFactory::instance().get(config.getString(parameter_prefix + "type"));
parameter.name = config.getString(parameter_prefix + "name");
parameters.emplace_back(std::move(parameter));
}
if (is_executable_pool && !parameters.empty()) {
if (is_executable_pool && !parameters.empty())
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
"Executable user defined functions with `executable_pool` type does not support parameters");
}
UserDefinedExecutableFunctionConfiguration function_configuration
{

View File

@ -26,7 +26,6 @@ namespace ErrorCodes
{
extern const int UNSUPPORTED_METHOD;
extern const int BAD_ARGUMENTS;
extern const int TYPE_MISMATCH;
}
class UserDefinedFunction final : public IFunction
@ -42,13 +41,12 @@ public:
{
const auto & configuration = executable_function->getConfiguration();
size_t command_parameters_size = configuration.parameters.size();
if (command_parameters_size != parameters_.size()) {
if (command_parameters_size != parameters_.size())
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Executable user defined function {} number of parameters does not match. Expected {}. Actual {}",
configuration.name,
command_parameters_size,
parameters_.size());
}
command_with_parameters = configuration.command;
command_arguments_with_parameters = configuration.command_arguments;
@ -59,26 +57,33 @@ public:
const auto & parameter_value = parameters_[i];
auto converted_parameter = convertFieldToTypeOrThrow(parameter_value, *command_parameter.type);
auto parameter_placeholder = "{" + command_parameter.name + "}";
size_t parameter_placeholder_size = parameter_placeholder.size();
auto parameter_value_string = applyVisitor(FieldVisitorToString(), converted_parameter);
bool find_placedholder = false;
for (auto & command_argument : command_arguments_with_parameters) {
auto parameter_placeholder_position = command_argument.find(parameter_placeholder);
if (parameter_placeholder_position == std::string::npos)
continue;
auto try_replace_parameter_placeholder_with_value = [&](std::string & command_part)
{
size_t previous_parameter_placeholder_position = 0;
while (true)
{
auto parameter_placeholder_position = command_part.find(parameter_placeholder, previous_parameter_placeholder_position);
if (parameter_placeholder_position == std::string::npos)
break;
size_t parameter_placeholder_size = parameter_placeholder.size();
command_part.replace(parameter_placeholder_position, parameter_placeholder_size, parameter_value_string);
previous_parameter_placeholder_position = parameter_placeholder_position + parameter_value_string.size();
find_placedholder = true;
}
command_argument.replace(parameter_placeholder_position, parameter_placeholder_size, parameter_value_string);
find_placedholder = true;
}
};
auto parameter_placeholder_position = command_with_parameters.find(parameter_placeholder);
for (auto & command_argument : command_arguments_with_parameters)
try_replace_parameter_placeholder_with_value(command_argument);
if (parameter_placeholder_position != std::string::npos) {
command_with_parameters.replace(parameter_placeholder_position, parameter_placeholder_size, parameter_value_string);
find_placedholder = true;
}
try_replace_parameter_placeholder_with_value(command_with_parameters);
if (!find_placedholder)
{

View File

@ -23,12 +23,8 @@
<argument>
<type>UInt64</type>
</argument>
<parameter>
<name>test_parameter</name>
<type>UInt64</type>
</parameter>
<format>TabSeparated</format>
<command>cd /; clickhouse-local --input-format TabSeparated --output-format TabSeparated --structure 'x UInt64, y UInt64' --query "SELECT x + y + {test_parameter} FROM table"</command>
<command>cd /; clickhouse-local --input-format TabSeparated --output-format TabSeparated --structure 'x UInt64, y UInt64' --query "SELECT x + y + {test_parameter : UInt64} FROM table"</command>
<execute_direct>0</execute_direct>
</function>
</functions>