2021-04-14 17:51:55 +00:00
|
|
|
#include <Storages/StorageExecutable.h>
|
2021-08-25 19:30:22 +00:00
|
|
|
|
|
|
|
#include <filesystem>
|
|
|
|
|
|
|
|
#include <Common/ShellCommand.h>
|
|
|
|
#include <Core/Block.h>
|
2021-04-15 09:40:41 +00:00
|
|
|
#include <IO/ReadHelpers.h>
|
2021-08-29 20:19:05 +00:00
|
|
|
#include <Parsers/ASTLiteral.h>
|
|
|
|
#include <Parsers/ASTSelectWithUnionQuery.h>
|
2021-08-25 19:30:22 +00:00
|
|
|
#include <Processors/Pipe.h>
|
2021-04-14 17:51:55 +00:00
|
|
|
#include <Interpreters/Context.h>
|
2021-08-29 20:19:05 +00:00
|
|
|
#include <Interpreters/InterpreterSelectWithUnionQuery.h>
|
|
|
|
#include <Interpreters/evaluateConstantExpression.h>
|
2021-04-14 17:51:55 +00:00
|
|
|
#include <Storages/StorageFactory.h>
|
2021-08-25 19:30:22 +00:00
|
|
|
#include <DataStreams/IBlockInputStream.h>
|
2021-08-24 19:38:42 +00:00
|
|
|
#include <DataStreams/ShellCommandSource.h>
|
2021-04-14 17:51:55 +00:00
|
|
|
|
2021-08-29 20:19:05 +00:00
|
|
|
|
2021-04-14 17:51:55 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
2021-08-24 19:38:42 +00:00
|
|
|
|
2021-04-14 17:51:55 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
2021-08-25 19:30:22 +00:00
|
|
|
extern const int UNSUPPORTED_METHOD;
|
2021-08-28 19:47:59 +00:00
|
|
|
extern const int LOGICAL_ERROR;
|
2021-08-29 20:19:05 +00:00
|
|
|
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
2021-04-14 17:51:55 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
StorageExecutable::StorageExecutable(
|
2021-04-15 09:40:41 +00:00
|
|
|
const StorageID & table_id_,
|
2021-08-25 19:30:22 +00:00
|
|
|
const String & script_name_,
|
2021-08-29 20:19:05 +00:00
|
|
|
const std::vector<String> & arguments_,
|
2021-04-15 21:15:54 +00:00
|
|
|
const String & format_,
|
2021-08-29 20:19:05 +00:00
|
|
|
const std::vector<ASTPtr> & input_queries_,
|
2021-04-14 17:51:55 +00:00
|
|
|
const ColumnsDescription & columns,
|
2021-08-24 19:38:42 +00:00
|
|
|
const ConstraintsDescription & constraints)
|
2021-04-15 09:40:41 +00:00
|
|
|
: IStorage(table_id_)
|
2021-08-25 19:30:22 +00:00
|
|
|
, script_name(script_name_)
|
2021-08-29 20:19:05 +00:00
|
|
|
, arguments(arguments_)
|
2021-04-15 21:15:54 +00:00
|
|
|
, format(format_)
|
2021-08-29 20:19:05 +00:00
|
|
|
, input_queries(input_queries_)
|
2021-08-24 19:38:42 +00:00
|
|
|
, log(&Poco::Logger::get("StorageExecutable"))
|
2021-04-14 17:51:55 +00:00
|
|
|
{
|
|
|
|
StorageInMemoryMetadata storage_metadata;
|
|
|
|
storage_metadata.setColumns(columns);
|
|
|
|
storage_metadata.setConstraints(constraints);
|
|
|
|
setInMemoryMetadata(storage_metadata);
|
|
|
|
}
|
|
|
|
|
|
|
|
Pipe StorageExecutable::read(
|
|
|
|
const Names & /*column_names*/,
|
|
|
|
const StorageMetadataPtr & metadata_snapshot,
|
|
|
|
SelectQueryInfo & /*query_info*/,
|
2021-08-25 19:30:22 +00:00
|
|
|
ContextPtr context,
|
2021-04-14 17:51:55 +00:00
|
|
|
QueryProcessingStage::Enum /*processed_stage*/,
|
|
|
|
size_t max_block_size,
|
2021-08-25 19:30:22 +00:00
|
|
|
unsigned /*threads*/)
|
2021-04-14 17:51:55 +00:00
|
|
|
{
|
2021-08-25 19:30:22 +00:00
|
|
|
auto user_scripts_path = context->getUserScriptsPath();
|
|
|
|
auto script_path = user_scripts_path + '/' + script_name;
|
|
|
|
if (!std::filesystem::exists(std::filesystem::path(script_path)))
|
|
|
|
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
|
|
|
|
"Executable file {} does not exists inside {}",
|
|
|
|
script_name,
|
|
|
|
user_scripts_path);
|
|
|
|
|
2021-08-29 20:19:05 +00:00
|
|
|
std::vector<BlockInputStreamPtr> inputs;
|
|
|
|
inputs.reserve(input_queries.size());
|
|
|
|
|
|
|
|
for (auto & input_query : input_queries)
|
|
|
|
{
|
|
|
|
InterpreterSelectWithUnionQuery interpreter(input_query, context, {});
|
|
|
|
auto input = interpreter.execute().getInputStream();
|
|
|
|
inputs.emplace_back(std::move(input));
|
|
|
|
}
|
2021-08-25 19:30:22 +00:00
|
|
|
|
|
|
|
ShellCommand::Config config(script_path);
|
2021-08-29 20:19:05 +00:00
|
|
|
config.arguments = arguments;
|
2021-08-28 19:47:59 +00:00
|
|
|
for (size_t i = 1; i < inputs.size(); ++i)
|
|
|
|
config.write_fds.emplace_back(i + 2);
|
2021-08-25 19:30:22 +00:00
|
|
|
|
2021-08-30 18:41:36 +00:00
|
|
|
auto process = ShellCommand::executeDirect(config);
|
2021-08-25 19:30:22 +00:00
|
|
|
|
2021-08-28 19:47:59 +00:00
|
|
|
std::vector<ShellCommandSource::SendDataTask> tasks;
|
|
|
|
tasks.reserve(inputs.size());
|
2021-08-25 19:30:22 +00:00
|
|
|
|
2021-08-28 19:47:59 +00:00
|
|
|
for (size_t i = 0; i < inputs.size(); ++i)
|
2021-08-25 19:30:22 +00:00
|
|
|
{
|
2021-08-28 19:47:59 +00:00
|
|
|
BlockInputStreamPtr input_stream = inputs[i];
|
2021-08-30 18:41:36 +00:00
|
|
|
WriteBufferFromFile * write_buffer = nullptr;
|
2021-08-28 19:47:59 +00:00
|
|
|
|
|
|
|
if (i == 0)
|
|
|
|
{
|
|
|
|
write_buffer = &process->in;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
auto descriptor = i + 2;
|
|
|
|
auto it = process->write_fds.find(descriptor);
|
|
|
|
if (it == process->write_fds.end())
|
|
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Process does not contain descriptor to write {}", descriptor);
|
|
|
|
|
|
|
|
write_buffer = &it->second;
|
|
|
|
}
|
|
|
|
|
|
|
|
ShellCommandSource::SendDataTask task = [input_stream, write_buffer, context, this]()
|
|
|
|
{
|
|
|
|
auto output_stream = context->getOutputStream(format, *write_buffer, input_stream->getHeader().cloneEmpty());
|
|
|
|
input_stream->readPrefix();
|
|
|
|
output_stream->writePrefix();
|
|
|
|
|
|
|
|
while (auto block = input_stream->read())
|
|
|
|
output_stream->write(block);
|
|
|
|
|
|
|
|
input_stream->readSuffix();
|
|
|
|
output_stream->writeSuffix();
|
|
|
|
|
|
|
|
output_stream->flush();
|
|
|
|
write_buffer->close();
|
|
|
|
};
|
|
|
|
|
|
|
|
tasks.emplace_back(std::move(task));
|
2021-08-25 19:30:22 +00:00
|
|
|
}
|
|
|
|
|
2021-08-29 20:19:05 +00:00
|
|
|
auto sample_block = metadata_snapshot->getSampleBlock();
|
2021-08-28 19:47:59 +00:00
|
|
|
Pipe pipe(std::make_unique<ShellCommandSource>(context, format, sample_block, std::move(process), log, std::move(tasks), max_block_size));
|
|
|
|
return pipe;
|
2021-04-14 17:51:55 +00:00
|
|
|
}
|
2021-08-28 19:47:59 +00:00
|
|
|
|
2021-08-29 20:19:05 +00:00
|
|
|
void registerStorageExecutable(StorageFactory & factory)
|
|
|
|
{
|
|
|
|
factory.registerStorage("Executable", [](const StorageFactory::Arguments & args)
|
|
|
|
{
|
|
|
|
auto local_context = args.getLocalContext();
|
|
|
|
|
|
|
|
if (args.engine_args.size() < 2)
|
|
|
|
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
|
|
|
"StorageExecutable requires minimum 2 arguments: script_name, format, [input_query...]");
|
|
|
|
|
|
|
|
for (size_t i = 0; i < 2; ++i)
|
|
|
|
args.engine_args[i] = evaluateConstantExpressionOrIdentifierAsLiteral(args.engine_args[i], local_context);
|
|
|
|
|
|
|
|
auto scipt_name_with_arguments_value = args.engine_args[0]->as<ASTLiteral &>().value.safeGet<String>();
|
|
|
|
|
|
|
|
std::vector<String> script_name_with_arguments;
|
|
|
|
boost::split(script_name_with_arguments, scipt_name_with_arguments_value, [](char c){ return c == ' '; });
|
|
|
|
|
|
|
|
auto script_name = script_name_with_arguments[0];
|
|
|
|
script_name_with_arguments.erase(script_name_with_arguments.begin());
|
|
|
|
auto format = args.engine_args[1]->as<ASTLiteral &>().value.safeGet<String>();
|
|
|
|
|
|
|
|
std::vector<ASTPtr> input_queries;
|
|
|
|
for (size_t i = 2; i < args.engine_args.size(); ++i)
|
|
|
|
{
|
|
|
|
ASTPtr query = args.engine_args[i]->children.at(0);
|
|
|
|
if (!query->as<ASTSelectWithUnionQuery>())
|
|
|
|
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
|
|
|
|
"StorageExecutable argument is invalid input query {}",
|
|
|
|
query->formatForErrorMessage());
|
|
|
|
|
|
|
|
input_queries.emplace_back(std::move(query));
|
|
|
|
}
|
|
|
|
|
|
|
|
const auto & columns = args.columns;
|
|
|
|
const auto & constraints = args.constraints;
|
|
|
|
|
|
|
|
return StorageExecutable::create(args.table_id, script_name, script_name_with_arguments, format, input_queries, columns, constraints);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2021-04-14 17:51:55 +00:00
|
|
|
};
|
|
|
|
|