mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 23:52:03 +00:00
Added ExecutablePool storage
This commit is contained in:
parent
befb82e441
commit
abda2a636e
@ -3,8 +3,11 @@
|
|||||||
#include <memory>
|
#include <memory>
|
||||||
|
|
||||||
#include <common/logger_useful.h>
|
#include <common/logger_useful.h>
|
||||||
|
#include <common/BorrowedObjectPool.h>
|
||||||
|
|
||||||
#include <Common/ShellCommand.h>
|
#include <Common/ShellCommand.h>
|
||||||
#include <Common/ThreadPool.h>
|
#include <Common/ThreadPool.h>
|
||||||
|
|
||||||
#include <IO/ReadHelpers.h>
|
#include <IO/ReadHelpers.h>
|
||||||
#include <Formats/FormatFactory.h>
|
#include <Formats/FormatFactory.h>
|
||||||
#include <Processors/ISimpleTransform.h>
|
#include <Processors/ISimpleTransform.h>
|
||||||
@ -17,8 +20,9 @@
|
|||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
/** A stream, that runs child process and sends data to its stdin in background thread,
|
/** A stream, that get child process and sends data tasks.
|
||||||
* and receives data from its stdout.
|
* For each send data task background thread is created, send data tasks must send data to process input pipes.
|
||||||
|
* ShellCommandSource receives data from process stdout.
|
||||||
*/
|
*/
|
||||||
class ShellCommandSource final : public SourceWithProgress
|
class ShellCommandSource final : public SourceWithProgress
|
||||||
{
|
{
|
||||||
@ -29,7 +33,7 @@ public:
|
|||||||
ContextPtr context,
|
ContextPtr context,
|
||||||
const std::string & format,
|
const std::string & format,
|
||||||
const Block & sample_block,
|
const Block & sample_block,
|
||||||
std::unique_ptr<ShellCommand> command_,
|
std::unique_ptr<ShellCommand> && command_,
|
||||||
Poco::Logger * log_,
|
Poco::Logger * log_,
|
||||||
std::vector<SendDataTask> && send_data_tasks,
|
std::vector<SendDataTask> && send_data_tasks,
|
||||||
size_t max_block_size = DEFAULT_BLOCK_SIZE)
|
size_t max_block_size = DEFAULT_BLOCK_SIZE)
|
||||||
@ -48,7 +52,7 @@ public:
|
|||||||
ContextPtr context,
|
ContextPtr context,
|
||||||
const std::string & format,
|
const std::string & format,
|
||||||
const Block & sample_block,
|
const Block & sample_block,
|
||||||
std::unique_ptr<ShellCommand> command_,
|
std::unique_ptr<ShellCommand> && command_,
|
||||||
Poco::Logger * log_,
|
Poco::Logger * log_,
|
||||||
size_t max_block_size = DEFAULT_BLOCK_SIZE)
|
size_t max_block_size = DEFAULT_BLOCK_SIZE)
|
||||||
: SourceWithProgress(sample_block)
|
: SourceWithProgress(sample_block)
|
||||||
@ -107,4 +111,173 @@ private:
|
|||||||
Poco::Logger * log;
|
Poco::Logger * log;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/** A stream, that get child process and sends data tasks.
|
||||||
|
* For each send data task background thread is created, send data tasks must send data to process input pipes.
|
||||||
|
* ShellCommandPoolSource receives data from process stdout.
|
||||||
|
*
|
||||||
|
* Main difference with ShellCommandSource is that ShellCommandPoolSource initialized with process_pool and rows_to_read.
|
||||||
|
* Rows to read are necessary because processes in pool are not destroyed and work in read write loop.
|
||||||
|
* Source need to finish generating new chunks after rows_to_read rows are generated from process.
|
||||||
|
*
|
||||||
|
* If rows_to_read are not specified it is expected that script will output rows_to_read before other data.
|
||||||
|
*
|
||||||
|
* After source is destroyed process is returned to pool.
|
||||||
|
*/
|
||||||
|
|
||||||
|
using ProcessPool = BorrowedObjectPool<std::unique_ptr<ShellCommand>>;
|
||||||
|
|
||||||
|
class ShellCommandPoolSource final : public SourceWithProgress
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
using SendDataTask = std::function<void(void)>;
|
||||||
|
|
||||||
|
ShellCommandPoolSource(
|
||||||
|
ContextPtr context,
|
||||||
|
const std::string & format,
|
||||||
|
const Block & sample_block,
|
||||||
|
std::shared_ptr<ProcessPool> process_pool_,
|
||||||
|
std::unique_ptr<ShellCommand> && command_,
|
||||||
|
size_t rows_to_read_,
|
||||||
|
Poco::Logger * log_,
|
||||||
|
std::vector<SendDataTask> && send_data_tasks)
|
||||||
|
: SourceWithProgress(sample_block)
|
||||||
|
, process_pool(process_pool_)
|
||||||
|
, command(std::move(command_))
|
||||||
|
, rows_to_read(rows_to_read_)
|
||||||
|
, log(log_)
|
||||||
|
{
|
||||||
|
for (auto && send_data_task : send_data_tasks)
|
||||||
|
{
|
||||||
|
send_data_threads.emplace_back([task = std::move(send_data_task), this]()
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
task();
|
||||||
|
}
|
||||||
|
catch (...)
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(send_data_lock);
|
||||||
|
exception_during_send_data = std::current_exception();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
pipeline.init(Pipe(FormatFactory::instance().getInput(format, command->out, sample_block, context, rows_to_read)));
|
||||||
|
executor = std::make_unique<PullingPipelineExecutor>(pipeline);
|
||||||
|
}
|
||||||
|
|
||||||
|
ShellCommandPoolSource(
|
||||||
|
ContextPtr context,
|
||||||
|
const std::string & format,
|
||||||
|
const Block & sample_block,
|
||||||
|
std::shared_ptr<ProcessPool> process_pool_,
|
||||||
|
std::unique_ptr<ShellCommand> && command_,
|
||||||
|
Poco::Logger * log_,
|
||||||
|
std::vector<SendDataTask> && send_data_tasks)
|
||||||
|
: SourceWithProgress(sample_block)
|
||||||
|
, process_pool(process_pool_)
|
||||||
|
, command(std::move(command_))
|
||||||
|
, log(log_)
|
||||||
|
{
|
||||||
|
for (auto && send_data_task : send_data_tasks)
|
||||||
|
{
|
||||||
|
send_data_threads.emplace_back([task = std::move(send_data_task), this]()
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
task();
|
||||||
|
}
|
||||||
|
catch (...)
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(send_data_lock);
|
||||||
|
exception_during_send_data = std::current_exception();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
readText(rows_to_read, command->out);
|
||||||
|
pipeline.init(Pipe(FormatFactory::instance().getInput(format, command->out, sample_block, context, rows_to_read)));
|
||||||
|
executor = std::make_unique<PullingPipelineExecutor>(pipeline);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
~ShellCommandPoolSource() override
|
||||||
|
{
|
||||||
|
for (auto & thread : send_data_threads)
|
||||||
|
if (thread.joinable())
|
||||||
|
thread.join();
|
||||||
|
|
||||||
|
if (command)
|
||||||
|
process_pool->returnObject(std::move(command));
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
Chunk generate() override
|
||||||
|
{
|
||||||
|
rethrowExceptionDuringReadIfNeeded();
|
||||||
|
|
||||||
|
if (current_read_rows == rows_to_read)
|
||||||
|
return {};
|
||||||
|
|
||||||
|
Chunk chunk;
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
if (!executor->pull(chunk))
|
||||||
|
return {};
|
||||||
|
|
||||||
|
current_read_rows += chunk.getNumRows();
|
||||||
|
}
|
||||||
|
catch (...)
|
||||||
|
{
|
||||||
|
tryLogCurrentException(log);
|
||||||
|
command = nullptr;
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
|
||||||
|
return chunk;
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
Status prepare() override
|
||||||
|
{
|
||||||
|
auto status = SourceWithProgress::prepare();
|
||||||
|
|
||||||
|
if (status == Status::Finished)
|
||||||
|
{
|
||||||
|
for (auto & thread : send_data_threads)
|
||||||
|
if (thread.joinable())
|
||||||
|
thread.join();
|
||||||
|
|
||||||
|
rethrowExceptionDuringReadIfNeeded();
|
||||||
|
}
|
||||||
|
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
void rethrowExceptionDuringReadIfNeeded()
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(send_data_lock);
|
||||||
|
if (exception_during_send_data)
|
||||||
|
{
|
||||||
|
command = nullptr;
|
||||||
|
std::rethrow_exception(exception_during_send_data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
String getName() const override { return "ShellCommandPoolSource"; }
|
||||||
|
|
||||||
|
std::shared_ptr<ProcessPool> process_pool;
|
||||||
|
std::unique_ptr<ShellCommand> command;
|
||||||
|
QueryPipeline pipeline;
|
||||||
|
std::unique_ptr<PullingPipelineExecutor> executor;
|
||||||
|
size_t rows_to_read = 0;
|
||||||
|
Poco::Logger * log;
|
||||||
|
std::vector<ThreadFromGlobalPool> send_data_threads;
|
||||||
|
|
||||||
|
size_t current_read_rows = 0;
|
||||||
|
|
||||||
|
std::mutex send_data_lock;
|
||||||
|
std::exception_ptr exception_during_send_data;
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
#include "ExecutableDictionarySource.h"
|
#include "ExecutableDictionarySource.h"
|
||||||
|
|
||||||
#include <functional>
|
|
||||||
#include <common/logger_useful.h>
|
#include <common/logger_useful.h>
|
||||||
#include <common/LocalDateTime.h>
|
#include <common/LocalDateTime.h>
|
||||||
#include <Common/ShellCommand.h>
|
#include <Common/ShellCommand.h>
|
||||||
@ -125,8 +124,7 @@ Pipe ExecutableDictionarySource::getStreamForBlock(const Block & block)
|
|||||||
formatBlock(output_stream, block);
|
formatBlock(output_stream, block);
|
||||||
out.close();
|
out.close();
|
||||||
}};
|
}};
|
||||||
|
std::vector<ShellCommandSource::SendDataTask> tasks = {std::move(task)};
|
||||||
std::vector<ShellCommandSource::SendDataTask> tasks = {task};
|
|
||||||
|
|
||||||
Pipe pipe(std::make_unique<ShellCommandSource>(context, configuration.format, sample_block, std::move(process), log, std::move(tasks)));
|
Pipe pipe(std::make_unique<ShellCommandSource>(context, configuration.format, sample_block, std::move(process), log, std::move(tasks)));
|
||||||
|
|
||||||
|
@ -1,11 +1,12 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "DictionaryStructure.h"
|
#include <common/logger_useful.h>
|
||||||
#include "IDictionarySource.h"
|
|
||||||
#include <Core/Block.h>
|
#include <Core/Block.h>
|
||||||
#include <Interpreters/Context.h>
|
#include <Interpreters/Context.h>
|
||||||
|
|
||||||
namespace Poco { class Logger; }
|
#include <Dictionaries/IDictionarySource.h>
|
||||||
|
#include <Dictionaries/DictionaryStructure.h>
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
|
@ -1,24 +1,21 @@
|
|||||||
#include "ExecutablePoolDictionarySource.h"
|
#include "ExecutablePoolDictionarySource.h"
|
||||||
|
|
||||||
#include <functional>
|
#include <common/logger_useful.h>
|
||||||
#include <common/scope_guard.h>
|
#include <common/LocalDateTime.h>
|
||||||
#include <Processors/Sources/SourceWithProgress.h>
|
#include <Common/ShellCommand.h>
|
||||||
#include <Processors/Executors/PullingPipelineExecutor.h>
|
|
||||||
#include <Processors/QueryPipeline.h>
|
#include <IO/WriteHelpers.h>
|
||||||
|
#include <IO/ReadHelpers.h>
|
||||||
|
|
||||||
#include <DataStreams/formatBlock.h>
|
#include <DataStreams/formatBlock.h>
|
||||||
|
|
||||||
#include <Interpreters/Context.h>
|
#include <Interpreters/Context.h>
|
||||||
#include <IO/WriteHelpers.h>
|
#include <IO/WriteHelpers.h>
|
||||||
#include <IO/ReadHelpers.h>
|
#include <IO/ReadHelpers.h>
|
||||||
#include <Formats/FormatFactory.h>
|
|
||||||
#include <Processors/Formats/IInputFormat.h>
|
#include <Dictionaries/DictionarySourceFactory.h>
|
||||||
#include <Common/ShellCommand.h>
|
#include <Dictionaries/DictionarySourceHelpers.h>
|
||||||
#include <Common/ThreadPool.h>
|
#include <Dictionaries/DictionaryStructure.h>
|
||||||
#include <common/logger_useful.h>
|
|
||||||
#include <common/LocalDateTime.h>
|
|
||||||
#include "DictionarySourceFactory.h"
|
|
||||||
#include "DictionarySourceHelpers.h"
|
|
||||||
#include "DictionaryStructure.h"
|
|
||||||
#include "registerDictionaries.h"
|
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
@ -37,13 +34,13 @@ ExecutablePoolDictionarySource::ExecutablePoolDictionarySource(
|
|||||||
const Configuration & configuration_,
|
const Configuration & configuration_,
|
||||||
Block & sample_block_,
|
Block & sample_block_,
|
||||||
ContextPtr context_)
|
ContextPtr context_)
|
||||||
: log(&Poco::Logger::get("ExecutablePoolDictionarySource"))
|
: dict_struct(dict_struct_)
|
||||||
, dict_struct{dict_struct_}
|
, configuration(configuration_)
|
||||||
, configuration{configuration_}
|
, sample_block(sample_block_)
|
||||||
, sample_block{sample_block_}
|
, context(context_)
|
||||||
, context{context_}
|
|
||||||
/// If pool size == 0 then there is no size restrictions. Poco max size of semaphore is integer type.
|
/// If pool size == 0 then there is no size restrictions. Poco max size of semaphore is integer type.
|
||||||
, process_pool{std::make_shared<ProcessPool>(configuration.pool_size == 0 ? std::numeric_limits<int>::max() : configuration.pool_size)}
|
, process_pool(std::make_shared<ProcessPool>(configuration.pool_size == 0 ? std::numeric_limits<int>::max() : configuration.pool_size))
|
||||||
|
, log(&Poco::Logger::get("ExecutablePoolDictionarySource"))
|
||||||
{
|
{
|
||||||
/// Remove keys from sample_block for implicit_key dictionary because
|
/// Remove keys from sample_block for implicit_key dictionary because
|
||||||
/// these columns will not be returned from source
|
/// these columns will not be returned from source
|
||||||
@ -62,13 +59,12 @@ ExecutablePoolDictionarySource::ExecutablePoolDictionarySource(
|
|||||||
}
|
}
|
||||||
|
|
||||||
ExecutablePoolDictionarySource::ExecutablePoolDictionarySource(const ExecutablePoolDictionarySource & other)
|
ExecutablePoolDictionarySource::ExecutablePoolDictionarySource(const ExecutablePoolDictionarySource & other)
|
||||||
: log(&Poco::Logger::get("ExecutablePoolDictionarySource"))
|
: dict_struct(other.dict_struct)
|
||||||
, update_time{other.update_time}
|
, configuration(other.configuration)
|
||||||
, dict_struct{other.dict_struct}
|
, sample_block(other.sample_block)
|
||||||
, configuration{other.configuration}
|
, context(Context::createCopy(other.context))
|
||||||
, sample_block{other.sample_block}
|
, process_pool(std::make_shared<ProcessPool>(configuration.pool_size))
|
||||||
, context{Context::createCopy(other.context)}
|
, log(&Poco::Logger::get("ExecutablePoolDictionarySource"))
|
||||||
, process_pool{std::make_shared<ProcessPool>(configuration.pool_size)}
|
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -82,123 +78,6 @@ Pipe ExecutablePoolDictionarySource::loadUpdatedAll()
|
|||||||
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "ExecutablePoolDictionarySource does not support loadUpdatedAll method");
|
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "ExecutablePoolDictionarySource does not support loadUpdatedAll method");
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace
|
|
||||||
{
|
|
||||||
/** A stream, that runs child process and sends data to its stdin in background thread,
|
|
||||||
* and receives data from its stdout.
|
|
||||||
*/
|
|
||||||
class PoolSourceWithBackgroundThread final : public SourceWithProgress
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
PoolSourceWithBackgroundThread(
|
|
||||||
std::shared_ptr<ProcessPool> process_pool_,
|
|
||||||
std::unique_ptr<ShellCommand> && command_,
|
|
||||||
Pipe pipe,
|
|
||||||
size_t read_rows_,
|
|
||||||
Poco::Logger * log_,
|
|
||||||
std::function<void(WriteBufferFromFile &)> && send_data_)
|
|
||||||
: SourceWithProgress(pipe.getHeader())
|
|
||||||
, process_pool(process_pool_)
|
|
||||||
, command(std::move(command_))
|
|
||||||
, rows_to_read(read_rows_)
|
|
||||||
, log(log_)
|
|
||||||
, send_data(std::move(send_data_))
|
|
||||||
, thread([this]
|
|
||||||
{
|
|
||||||
try
|
|
||||||
{
|
|
||||||
send_data(command->in);
|
|
||||||
}
|
|
||||||
catch (...)
|
|
||||||
{
|
|
||||||
std::lock_guard<std::mutex> lck(exception_during_read_lock);
|
|
||||||
exception_during_read = std::current_exception();
|
|
||||||
}
|
|
||||||
})
|
|
||||||
{
|
|
||||||
pipeline.init(std::move(pipe));
|
|
||||||
executor = std::make_unique<PullingPipelineExecutor>(pipeline);
|
|
||||||
}
|
|
||||||
|
|
||||||
~PoolSourceWithBackgroundThread() override
|
|
||||||
{
|
|
||||||
if (thread.joinable())
|
|
||||||
thread.join();
|
|
||||||
|
|
||||||
if (command)
|
|
||||||
process_pool->returnObject(std::move(command));
|
|
||||||
}
|
|
||||||
|
|
||||||
protected:
|
|
||||||
Chunk generate() override
|
|
||||||
{
|
|
||||||
rethrowExceptionDuringReadIfNeeded();
|
|
||||||
|
|
||||||
if (current_read_rows == rows_to_read)
|
|
||||||
return {};
|
|
||||||
|
|
||||||
Chunk chunk;
|
|
||||||
|
|
||||||
try
|
|
||||||
{
|
|
||||||
if (!executor->pull(chunk))
|
|
||||||
return {};
|
|
||||||
|
|
||||||
current_read_rows += chunk.getNumRows();
|
|
||||||
}
|
|
||||||
catch (...)
|
|
||||||
{
|
|
||||||
tryLogCurrentException(log);
|
|
||||||
command = nullptr;
|
|
||||||
throw;
|
|
||||||
}
|
|
||||||
|
|
||||||
return chunk;
|
|
||||||
}
|
|
||||||
|
|
||||||
public:
|
|
||||||
Status prepare() override
|
|
||||||
{
|
|
||||||
auto status = SourceWithProgress::prepare();
|
|
||||||
|
|
||||||
if (status == Status::Finished)
|
|
||||||
{
|
|
||||||
if (thread.joinable())
|
|
||||||
thread.join();
|
|
||||||
|
|
||||||
rethrowExceptionDuringReadIfNeeded();
|
|
||||||
}
|
|
||||||
|
|
||||||
return status;
|
|
||||||
}
|
|
||||||
|
|
||||||
void rethrowExceptionDuringReadIfNeeded()
|
|
||||||
{
|
|
||||||
std::lock_guard<std::mutex> lck(exception_during_read_lock);
|
|
||||||
if (exception_during_read)
|
|
||||||
{
|
|
||||||
command = nullptr;
|
|
||||||
std::rethrow_exception(exception_during_read);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
String getName() const override { return "PoolWithBackgroundThread"; }
|
|
||||||
|
|
||||||
std::shared_ptr<ProcessPool> process_pool;
|
|
||||||
std::unique_ptr<ShellCommand> command;
|
|
||||||
QueryPipeline pipeline;
|
|
||||||
std::unique_ptr<PullingPipelineExecutor> executor;
|
|
||||||
size_t rows_to_read;
|
|
||||||
Poco::Logger * log;
|
|
||||||
std::function<void(WriteBufferFromFile &)> send_data;
|
|
||||||
ThreadFromGlobalPool thread;
|
|
||||||
size_t current_read_rows = 0;
|
|
||||||
std::mutex exception_during_read_lock;
|
|
||||||
std::exception_ptr exception_during_read;
|
|
||||||
};
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
Pipe ExecutablePoolDictionarySource::loadIds(const std::vector<UInt64> & ids)
|
Pipe ExecutablePoolDictionarySource::loadIds(const std::vector<UInt64> & ids)
|
||||||
{
|
{
|
||||||
LOG_TRACE(log, "loadIds {} size = {}", toString(), ids.size());
|
LOG_TRACE(log, "loadIds {} size = {}", toString(), ids.size());
|
||||||
@ -228,19 +107,20 @@ Pipe ExecutablePoolDictionarySource::getStreamForBlock(const Block & block)
|
|||||||
|
|
||||||
if (!result)
|
if (!result)
|
||||||
throw Exception(ErrorCodes::TIMEOUT_EXCEEDED,
|
throw Exception(ErrorCodes::TIMEOUT_EXCEEDED,
|
||||||
"Could not get process from pool, max command execution timeout exceeded ({}) seconds",
|
"Could not get process from pool, max command execution timeout exceeded {} seconds",
|
||||||
configuration.max_command_execution_time);
|
configuration.max_command_execution_time);
|
||||||
|
|
||||||
size_t rows_to_read = block.rows();
|
size_t rows_to_read = block.rows();
|
||||||
auto format = FormatFactory::instance().getInput(configuration.format, process->out, sample_block, context, rows_to_read);
|
auto * process_in = &process->in;
|
||||||
|
ShellCommandPoolSource::SendDataTask task = [process_in, block, this]() mutable
|
||||||
|
{
|
||||||
|
auto & out = *process_in;
|
||||||
|
auto output_stream = context->getOutputStream(configuration.format, out, block.cloneEmpty());
|
||||||
|
formatBlock(output_stream, block);
|
||||||
|
};
|
||||||
|
std::vector<ShellCommandPoolSource::SendDataTask> tasks = {std::move(task)};
|
||||||
|
|
||||||
Pipe pipe(std::make_unique<PoolSourceWithBackgroundThread>(
|
Pipe pipe(std::make_unique<ShellCommandPoolSource>(context, configuration.format, sample_block, process_pool, std::move(process), rows_to_read, log, std::move(tasks)));
|
||||||
process_pool, std::move(process), Pipe(std::move(format)), rows_to_read, log,
|
|
||||||
[block, this](WriteBufferFromFile & out) mutable
|
|
||||||
{
|
|
||||||
auto output_stream = context->getOutputStream(configuration.format, out, block.cloneEmpty());
|
|
||||||
formatBlock(output_stream, block);
|
|
||||||
}));
|
|
||||||
|
|
||||||
if (configuration.implicit_key)
|
if (configuration.implicit_key)
|
||||||
pipe.addTransform(std::make_shared<TransformWithAdditionalColumns>(block, pipe.getHeader()));
|
pipe.addTransform(std::make_shared<TransformWithAdditionalColumns>(block, pipe.getHeader()));
|
||||||
|
@ -1,20 +1,18 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <common/BorrowedObjectPool.h>
|
#include <common/logger_useful.h>
|
||||||
|
|
||||||
#include <Core/Block.h>
|
#include <Core/Block.h>
|
||||||
#include <Interpreters/Context.h>
|
#include <Interpreters/Context.h>
|
||||||
|
|
||||||
#include "IDictionarySource.h"
|
#include <Dictionaries/IDictionarySource.h>
|
||||||
#include "DictionaryStructure.h"
|
#include <Dictionaries/DictionaryStructure.h>
|
||||||
|
#include <DataStreams/ShellCommandSource.h>
|
||||||
namespace Poco { class Logger; }
|
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
using ProcessPool = BorrowedObjectPool<std::unique_ptr<ShellCommand>>;
|
|
||||||
|
|
||||||
/** ExecutablePoolDictionarySource allows loading data from pool of processes.
|
/** ExecutablePoolDictionarySource allows loading data from pool of processes.
|
||||||
* When client requests ids or keys source get process from ProcessPool
|
* When client requests ids or keys source get process from ProcessPool
|
||||||
@ -73,14 +71,13 @@ public:
|
|||||||
Pipe getStreamForBlock(const Block & block);
|
Pipe getStreamForBlock(const Block & block);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Poco::Logger * log;
|
|
||||||
time_t update_time = 0;
|
|
||||||
const DictionaryStructure dict_struct;
|
const DictionaryStructure dict_struct;
|
||||||
const Configuration configuration;
|
const Configuration configuration;
|
||||||
|
|
||||||
Block sample_block;
|
Block sample_block;
|
||||||
ContextPtr context;
|
ContextPtr context;
|
||||||
std::shared_ptr<ProcessPool> process_pool;
|
std::shared_ptr<ProcessPool> process_pool;
|
||||||
|
Poco::Logger * log;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
42
src/Storages/ExecutablePoolSettings.cpp
Normal file
42
src/Storages/ExecutablePoolSettings.cpp
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
#include "ExecutablePoolSettings.h"
|
||||||
|
|
||||||
|
#include <Common/Exception.h>
|
||||||
|
|
||||||
|
#include <Parsers/ASTCreateQuery.h>
|
||||||
|
#include <Parsers/ASTSetQuery.h>
|
||||||
|
#include <Parsers/ASTFunction.h>
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
namespace ErrorCodes
|
||||||
|
{
|
||||||
|
extern const int UNKNOWN_SETTING;
|
||||||
|
}
|
||||||
|
|
||||||
|
IMPLEMENT_SETTINGS_TRAITS(ExecutablePoolSettingsTraits, LIST_OF_EXECUTABLE_POOL_SETTINGS);
|
||||||
|
|
||||||
|
void ExecutablePoolSettings::loadFromQuery(ASTStorage & storage_def)
|
||||||
|
{
|
||||||
|
if (storage_def.settings)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
applyChanges(storage_def.settings->changes);
|
||||||
|
}
|
||||||
|
catch (Exception & e)
|
||||||
|
{
|
||||||
|
if (e.code() == ErrorCodes::UNKNOWN_SETTING)
|
||||||
|
e.addMessage("for storage " + storage_def.engine->name);
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
auto settings_ast = std::make_shared<ASTSetQuery>();
|
||||||
|
settings_ast->is_standalone = false;
|
||||||
|
storage_def.set(storage_def.settings, settings_ast);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
24
src/Storages/ExecutablePoolSettings.h
Normal file
24
src/Storages/ExecutablePoolSettings.h
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <Core/Defines.h>
|
||||||
|
#include <Core/BaseSettings.h>
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
class ASTStorage;
|
||||||
|
|
||||||
|
#define LIST_OF_EXECUTABLE_POOL_SETTINGS(M) \
|
||||||
|
M(UInt64, pool_size, 16, "Processes pool size. If size == 0, then there is no size restrictions", 0) \
|
||||||
|
M(UInt64, max_command_execution_time, 10, "Max command execution time in seconds.", 0) \
|
||||||
|
M(UInt64, command_termination_timeout, 10, "Command termination timeout in seconds.", 0) \
|
||||||
|
|
||||||
|
DECLARE_SETTINGS_TRAITS(ExecutablePoolSettingsTraits, LIST_OF_EXECUTABLE_POOL_SETTINGS)
|
||||||
|
|
||||||
|
/// Settings for ExecutablePool engine.
|
||||||
|
struct ExecutablePoolSettings : public BaseSettings<ExecutablePoolSettingsTraits>
|
||||||
|
{
|
||||||
|
void loadFromQuery(ASTStorage & storage_def);
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
@ -4,16 +4,18 @@
|
|||||||
|
|
||||||
#include <Common/ShellCommand.h>
|
#include <Common/ShellCommand.h>
|
||||||
#include <Core/Block.h>
|
#include <Core/Block.h>
|
||||||
|
|
||||||
#include <IO/ReadHelpers.h>
|
#include <IO/ReadHelpers.h>
|
||||||
#include <Parsers/ASTLiteral.h>
|
#include <Parsers/ASTLiteral.h>
|
||||||
#include <Parsers/ASTSelectWithUnionQuery.h>
|
#include <Parsers/ASTSelectWithUnionQuery.h>
|
||||||
|
#include <Parsers/ASTCreateQuery.h>
|
||||||
|
|
||||||
|
#include <DataStreams/IBlockInputStream.h>
|
||||||
#include <Processors/Pipe.h>
|
#include <Processors/Pipe.h>
|
||||||
#include <Interpreters/Context.h>
|
#include <Interpreters/Context.h>
|
||||||
#include <Interpreters/InterpreterSelectWithUnionQuery.h>
|
#include <Interpreters/InterpreterSelectWithUnionQuery.h>
|
||||||
#include <Interpreters/evaluateConstantExpression.h>
|
#include <Interpreters/evaluateConstantExpression.h>
|
||||||
#include <Storages/StorageFactory.h>
|
#include <Storages/StorageFactory.h>
|
||||||
#include <DataStreams/IBlockInputStream.h>
|
|
||||||
#include <DataStreams/ShellCommandSource.h>
|
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
@ -24,6 +26,7 @@ namespace ErrorCodes
|
|||||||
extern const int UNSUPPORTED_METHOD;
|
extern const int UNSUPPORTED_METHOD;
|
||||||
extern const int LOGICAL_ERROR;
|
extern const int LOGICAL_ERROR;
|
||||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||||
|
extern const int TIMEOUT_EXCEEDED;
|
||||||
}
|
}
|
||||||
|
|
||||||
StorageExecutable::StorageExecutable(
|
StorageExecutable::StorageExecutable(
|
||||||
@ -47,6 +50,31 @@ StorageExecutable::StorageExecutable(
|
|||||||
setInMemoryMetadata(storage_metadata);
|
setInMemoryMetadata(storage_metadata);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
StorageExecutable::StorageExecutable(
|
||||||
|
const StorageID & table_id_,
|
||||||
|
const String & script_name_,
|
||||||
|
const std::vector<String> & arguments_,
|
||||||
|
const String & format_,
|
||||||
|
const std::vector<ASTPtr> & input_queries_,
|
||||||
|
const ExecutablePoolSettings & pool_settings_,
|
||||||
|
const ColumnsDescription & columns,
|
||||||
|
const ConstraintsDescription & constraints)
|
||||||
|
: IStorage(table_id_)
|
||||||
|
, script_name(script_name_)
|
||||||
|
, arguments(arguments_)
|
||||||
|
, format(format_)
|
||||||
|
, input_queries(input_queries_)
|
||||||
|
, pool_settings(pool_settings_)
|
||||||
|
/// If pool size == 0 then there is no size restrictions. Poco max size of semaphore is integer type.
|
||||||
|
, process_pool(std::make_shared<ProcessPool>(pool_settings.pool_size == 0 ? std::numeric_limits<int>::max() : pool_settings.pool_size))
|
||||||
|
, log(&Poco::Logger::get("StorageExecutablePool"))
|
||||||
|
{
|
||||||
|
StorageInMemoryMetadata storage_metadata;
|
||||||
|
storage_metadata.setColumns(columns);
|
||||||
|
storage_metadata.setConstraints(constraints);
|
||||||
|
setInMemoryMetadata(storage_metadata);
|
||||||
|
}
|
||||||
|
|
||||||
Pipe StorageExecutable::read(
|
Pipe StorageExecutable::read(
|
||||||
const Names & /*column_names*/,
|
const Names & /*column_names*/,
|
||||||
const StorageMetadataPtr & metadata_snapshot,
|
const StorageMetadataPtr & metadata_snapshot,
|
||||||
@ -56,6 +84,8 @@ Pipe StorageExecutable::read(
|
|||||||
size_t max_block_size,
|
size_t max_block_size,
|
||||||
unsigned /*threads*/)
|
unsigned /*threads*/)
|
||||||
{
|
{
|
||||||
|
std::cerr << getName() << "::read" << std::endl;
|
||||||
|
|
||||||
auto user_scripts_path = context->getUserScriptsPath();
|
auto user_scripts_path = context->getUserScriptsPath();
|
||||||
auto script_path = user_scripts_path + '/' + script_name;
|
auto script_path = user_scripts_path + '/' + script_name;
|
||||||
if (!std::filesystem::exists(std::filesystem::path(script_path)))
|
if (!std::filesystem::exists(std::filesystem::path(script_path)))
|
||||||
@ -79,7 +109,27 @@ Pipe StorageExecutable::read(
|
|||||||
for (size_t i = 1; i < inputs.size(); ++i)
|
for (size_t i = 1; i < inputs.size(); ++i)
|
||||||
config.write_fds.emplace_back(i + 2);
|
config.write_fds.emplace_back(i + 2);
|
||||||
|
|
||||||
auto process = ShellCommand::executeDirect(config);
|
std::unique_ptr<ShellCommand> process;
|
||||||
|
|
||||||
|
if (process_pool)
|
||||||
|
{
|
||||||
|
std::cerr << getName() <<"::read create process" << std::endl;
|
||||||
|
bool result = process_pool->tryBorrowObject(process, [&config, this]()
|
||||||
|
{
|
||||||
|
config.terminate_in_destructor_strategy = ShellCommand::DestructorStrategy{ true /*terminate_in_destructor*/, pool_settings.command_termination_timeout };
|
||||||
|
auto shell_command = ShellCommand::execute(config);
|
||||||
|
return shell_command;
|
||||||
|
}, pool_settings.max_command_execution_time * 10000);
|
||||||
|
|
||||||
|
if (!result)
|
||||||
|
throw Exception(ErrorCodes::TIMEOUT_EXCEEDED,
|
||||||
|
"Could not get process from pool, max command execution timeout exceeded {} seconds",
|
||||||
|
pool_settings.max_command_execution_time);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
process = ShellCommand::executeDirect(config);
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<ShellCommandSource::SendDataTask> tasks;
|
std::vector<ShellCommandSource::SendDataTask> tasks;
|
||||||
tasks.reserve(inputs.size());
|
tasks.reserve(inputs.size());
|
||||||
@ -123,13 +173,22 @@ Pipe StorageExecutable::read(
|
|||||||
}
|
}
|
||||||
|
|
||||||
auto sample_block = metadata_snapshot->getSampleBlock();
|
auto sample_block = metadata_snapshot->getSampleBlock();
|
||||||
Pipe pipe(std::make_unique<ShellCommandSource>(context, format, sample_block, std::move(process), log, std::move(tasks), max_block_size));
|
|
||||||
return pipe;
|
if (process_pool)
|
||||||
|
{
|
||||||
|
Pipe pipe(std::make_unique<ShellCommandPoolSource>(context, format, std::move(sample_block), process_pool, std::move(process), log, std::move(tasks)));
|
||||||
|
return pipe;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Pipe pipe(std::make_unique<ShellCommandSource>(context, format, std::move(sample_block), std::move(process), log, std::move(tasks), max_block_size));
|
||||||
|
return pipe;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void registerStorageExecutable(StorageFactory & factory)
|
void registerStorageExecutable(StorageFactory & factory)
|
||||||
{
|
{
|
||||||
factory.registerStorage("Executable", [](const StorageFactory::Arguments & args)
|
auto register_storage = [](const StorageFactory::Arguments & args, bool is_executable_pool) -> StoragePtr
|
||||||
{
|
{
|
||||||
auto local_context = args.getLocalContext();
|
auto local_context = args.getLocalContext();
|
||||||
|
|
||||||
@ -143,7 +202,7 @@ void registerStorageExecutable(StorageFactory & factory)
|
|||||||
auto scipt_name_with_arguments_value = args.engine_args[0]->as<ASTLiteral &>().value.safeGet<String>();
|
auto scipt_name_with_arguments_value = args.engine_args[0]->as<ASTLiteral &>().value.safeGet<String>();
|
||||||
|
|
||||||
std::vector<String> script_name_with_arguments;
|
std::vector<String> script_name_with_arguments;
|
||||||
boost::split(script_name_with_arguments, scipt_name_with_arguments_value, [](char c){ return c == ' '; });
|
boost::split(script_name_with_arguments, scipt_name_with_arguments_value, [](char c) { return c == ' '; });
|
||||||
|
|
||||||
auto script_name = script_name_with_arguments[0];
|
auto script_name = script_name_with_arguments[0];
|
||||||
script_name_with_arguments.erase(script_name_with_arguments.begin());
|
script_name_with_arguments.erase(script_name_with_arguments.begin());
|
||||||
@ -154,8 +213,8 @@ void registerStorageExecutable(StorageFactory & factory)
|
|||||||
{
|
{
|
||||||
ASTPtr query = args.engine_args[i]->children.at(0);
|
ASTPtr query = args.engine_args[i]->children.at(0);
|
||||||
if (!query->as<ASTSelectWithUnionQuery>())
|
if (!query->as<ASTSelectWithUnionQuery>())
|
||||||
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
|
throw Exception(
|
||||||
"StorageExecutable argument is invalid input query {}",
|
ErrorCodes::UNSUPPORTED_METHOD, "StorageExecutable argument is invalid input query {}",
|
||||||
query->formatForErrorMessage());
|
query->formatForErrorMessage());
|
||||||
|
|
||||||
input_queries.emplace_back(std::move(query));
|
input_queries.emplace_back(std::move(query));
|
||||||
@ -164,7 +223,35 @@ void registerStorageExecutable(StorageFactory & factory)
|
|||||||
const auto & columns = args.columns;
|
const auto & columns = args.columns;
|
||||||
const auto & constraints = args.constraints;
|
const auto & constraints = args.constraints;
|
||||||
|
|
||||||
return StorageExecutable::create(args.table_id, script_name, script_name_with_arguments, format, input_queries, columns, constraints);
|
if (is_executable_pool)
|
||||||
|
{
|
||||||
|
size_t max_command_execution_time = 10;
|
||||||
|
|
||||||
|
size_t max_execution_time_seconds = static_cast<size_t>(args.getContext()->getSettings().max_execution_time.totalSeconds());
|
||||||
|
if (max_execution_time_seconds != 0 && max_command_execution_time > max_execution_time_seconds)
|
||||||
|
max_command_execution_time = max_execution_time_seconds;
|
||||||
|
|
||||||
|
ExecutablePoolSettings pool_settings;
|
||||||
|
pool_settings.max_command_execution_time = max_command_execution_time;
|
||||||
|
if (args.storage_def->settings)
|
||||||
|
pool_settings.loadFromQuery(*args.storage_def);
|
||||||
|
|
||||||
|
return StorageExecutable::create(args.table_id, script_name, script_name_with_arguments, format, input_queries, pool_settings, columns, constraints);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return StorageExecutable::create(args.table_id, script_name, script_name_with_arguments, format, input_queries, columns, constraints);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
factory.registerStorage("Executable", [&](const StorageFactory::Arguments & args)
|
||||||
|
{
|
||||||
|
return register_storage(args, false /*is_executable_pool*/);
|
||||||
|
});
|
||||||
|
|
||||||
|
factory.registerStorage("ExecutablePool", [&](const StorageFactory::Arguments & args)
|
||||||
|
{
|
||||||
|
return register_storage(args, true /*is_executable_pool*/);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3,19 +3,30 @@
|
|||||||
#include <common/logger_useful.h>
|
#include <common/logger_useful.h>
|
||||||
#include <common/shared_ptr_helper.h>
|
#include <common/shared_ptr_helper.h>
|
||||||
#include <Storages/IStorage.h>
|
#include <Storages/IStorage.h>
|
||||||
#include <IO/CompressionMethod.h>
|
#include <DataStreams/ShellCommandSource.h>
|
||||||
|
#include <Storages/ExecutablePoolSettings.h>
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class represents table engine for external executable files.
|
* This class represents table engine for external executable files.
|
||||||
|
* Executable storage that will start process for read.
|
||||||
|
* ExecutablePool storage maintain pool of processes and take process from pool for read.
|
||||||
*/
|
*/
|
||||||
class StorageExecutable final : public shared_ptr_helper<StorageExecutable>, public IStorage
|
class StorageExecutable final : public shared_ptr_helper<StorageExecutable>, public IStorage
|
||||||
{
|
{
|
||||||
friend struct shared_ptr_helper<StorageExecutable>;
|
friend struct shared_ptr_helper<StorageExecutable>;
|
||||||
public:
|
public:
|
||||||
String getName() const override { return "Executable"; }
|
|
||||||
|
String getName() const override
|
||||||
|
{
|
||||||
|
if (process_pool)
|
||||||
|
return "ExecutablePool";
|
||||||
|
else
|
||||||
|
return "Executable";
|
||||||
|
}
|
||||||
|
|
||||||
Pipe read(
|
Pipe read(
|
||||||
const Names & column_names,
|
const Names & column_names,
|
||||||
@ -36,12 +47,24 @@ protected:
|
|||||||
const ColumnsDescription & columns,
|
const ColumnsDescription & columns,
|
||||||
const ConstraintsDescription & constraints);
|
const ConstraintsDescription & constraints);
|
||||||
|
|
||||||
|
StorageExecutable(
|
||||||
|
const StorageID & table_id,
|
||||||
|
const String & script_name_,
|
||||||
|
const std::vector<String> & arguments_,
|
||||||
|
const String & format_,
|
||||||
|
const std::vector<ASTPtr> & input_queries_,
|
||||||
|
const ExecutablePoolSettings & pool_settings_,
|
||||||
|
const ColumnsDescription & columns,
|
||||||
|
const ConstraintsDescription & constraints);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
String script_name;
|
String script_name;
|
||||||
std::vector<String> arguments;
|
std::vector<String> arguments;
|
||||||
String format;
|
String format;
|
||||||
std::vector<ASTPtr> input_queries;
|
std::vector<ASTPtr> input_queries;
|
||||||
|
ExecutablePoolSettings pool_settings;
|
||||||
|
std::shared_ptr<ProcessPool> process_pool;
|
||||||
Poco::Logger * log;
|
Poco::Logger * log;
|
||||||
};
|
};
|
||||||
}
|
|
||||||
|
|
||||||
|
}
|
||||||
|
@ -55,3 +55,15 @@ def test_executable_storage_argument(started_cluster):
|
|||||||
node.query("CREATE TABLE test_table (value String) ENGINE=Executable('test_argument.sh 1', 'TabSeparated')")
|
node.query("CREATE TABLE test_table (value String) ENGINE=Executable('test_argument.sh 1', 'TabSeparated')")
|
||||||
assert node.query("SELECT * FROM test_table") == 'Key 1\n'
|
assert node.query("SELECT * FROM test_table") == 'Key 1\n'
|
||||||
node.query("DROP TABLE test_table")
|
node.query("DROP TABLE test_table")
|
||||||
|
|
||||||
|
def test_executable_pool_storage(started_cluster):
|
||||||
|
node.query("DROP TABLE IF EXISTS test_table")
|
||||||
|
node.query("CREATE TABLE test_table (value String) ENGINE=ExecutablePool('test_input_process_pool.sh', 'TabSeparated', (SELECT 1))")
|
||||||
|
assert node.query("SELECT * FROM test_table") == 'Key 1\n'
|
||||||
|
node.query("DROP TABLE test_table")
|
||||||
|
|
||||||
|
def test_executable_pool_storage_multiple_pipes(started_cluster):
|
||||||
|
node.query("DROP TABLE IF EXISTS test_table")
|
||||||
|
node.query("CREATE TABLE test_table (value String) ENGINE=ExecutablePool('test_input_process_pool_multiple_pipes.sh', 'TabSeparated', (SELECT 1), (SELECT 2), (SELECT 3))")
|
||||||
|
assert node.query("SELECT * FROM test_table") == 'Key from 4 fd 3\nKey from 3 fd 2\nKey from 0 fd 1\n'
|
||||||
|
node.query("DROP TABLE test_table")
|
||||||
|
@ -0,0 +1,3 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
while read read_data; do printf '1'; printf "Key $read_data\n"; done
|
@ -0,0 +1,10 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
read -t 250 -u 4 read_data_from_4_fd;
|
||||||
|
read -t 250 -u 3 read_data_from_3_fd;
|
||||||
|
read -t 250 read_data_from_0_df;
|
||||||
|
|
||||||
|
printf '3';
|
||||||
|
printf "Key from 4 fd $read_data_from_4_fd\n";
|
||||||
|
printf "Key from 3 fd $read_data_from_3_fd\n";
|
||||||
|
printf "Key from 0 fd $read_data_from_0_df\n";
|
Loading…
Reference in New Issue
Block a user