diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 1f8d612a125..9f5a9b05219 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -11,7 +11,7 @@ stage=${stage:-} # A variable to pass additional flags to CMake. # Here we explicitly default it to nothing so that bash doesn't complain about -# it being undefined. Also read it as array so that we can pass an empty list +# it being undefined. Also read it as array so that we can pass an empty list # of additional variable to cmake properly, and it doesn't generate an extra # empty parameter. read -ra FASTTEST_CMAKE_FLAGS <<< "${FASTTEST_CMAKE_FLAGS:-}" @@ -128,6 +128,7 @@ ln -s /usr/share/clickhouse-test/config/access_management.xml /etc/clickhouse-se ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/ ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/ ln -s /usr/share/clickhouse-test/config/decimals_dictionary.xml /etc/clickhouse-server/ +ln -s /usr/share/clickhouse-test/config/executable_dictionary.xml /etc/clickhouse-server/ ln -s /usr/share/clickhouse-test/config/macros.xml /etc/clickhouse-server/config.d/ ln -s /usr/share/clickhouse-test/config/disks.xml /etc/clickhouse-server/config.d/ #ln -s /usr/share/clickhouse-test/config/secure_ports.xml /etc/clickhouse-server/config.d/ diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 2ff15ca9c6a..4a9ad891883 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -24,6 +24,7 @@ ln -s /usr/share/clickhouse-test/config/access_management.xml /etc/clickhouse-se ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/ ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/ ln -s /usr/share/clickhouse-test/config/decimals_dictionary.xml /etc/clickhouse-server/ +ln -s /usr/share/clickhouse-test/config/executable_dictionary.xml /etc/clickhouse-server/ ln -s /usr/share/clickhouse-test/config/macros.xml /etc/clickhouse-server/config.d/ ln -s /usr/share/clickhouse-test/config/disks.xml /etc/clickhouse-server/config.d/ ln -s /usr/share/clickhouse-test/config/secure_ports.xml /etc/clickhouse-server/config.d/ diff --git a/docker/test/stateless_unbundled/run.sh b/docker/test/stateless_unbundled/run.sh index 2ff15ca9c6a..4a9ad891883 100755 --- a/docker/test/stateless_unbundled/run.sh +++ b/docker/test/stateless_unbundled/run.sh @@ -24,6 +24,7 @@ ln -s /usr/share/clickhouse-test/config/access_management.xml /etc/clickhouse-se ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/ ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/ ln -s /usr/share/clickhouse-test/config/decimals_dictionary.xml /etc/clickhouse-server/ +ln -s /usr/share/clickhouse-test/config/executable_dictionary.xml /etc/clickhouse-server/ ln -s /usr/share/clickhouse-test/config/macros.xml /etc/clickhouse-server/config.d/ ln -s /usr/share/clickhouse-test/config/disks.xml /etc/clickhouse-server/config.d/ ln -s /usr/share/clickhouse-test/config/secure_ports.xml /etc/clickhouse-server/config.d/ diff --git a/docker/test/stateless_with_coverage/run.sh b/docker/test/stateless_with_coverage/run.sh index 64317ee62fd..c3ccb18659b 100755 --- a/docker/test/stateless_with_coverage/run.sh +++ b/docker/test/stateless_with_coverage/run.sh @@ -57,6 +57,7 @@ ln -s /usr/share/clickhouse-test/config/access_management.xml /etc/clickhouse-se ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/ ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/ ln -s /usr/share/clickhouse-test/config/decimals_dictionary.xml /etc/clickhouse-server/ +ln -s /usr/share/clickhouse-test/config/executable_dictionary.xml /etc/clickhouse-server/ ln -s /usr/share/clickhouse-test/config/macros.xml /etc/clickhouse-server/config.d/ ln -s /usr/share/clickhouse-test/config/disks.xml /etc/clickhouse-server/config.d/ ln -s /usr/share/clickhouse-test/config/secure_ports.xml /etc/clickhouse-server/config.d/ diff --git a/src/Common/tests/CMakeLists.txt b/src/Common/tests/CMakeLists.txt index f6c232cdd22..8de9424e044 100644 --- a/src/Common/tests/CMakeLists.txt +++ b/src/Common/tests/CMakeLists.txt @@ -84,3 +84,6 @@ target_link_libraries (procfs_metrics_provider_perf PRIVATE clickhouse_common_io add_executable (average average.cpp) target_link_libraries (average PRIVATE clickhouse_common_io) + +add_executable (shell_command_inout shell_command_inout.cpp) +target_link_libraries (shell_command_inout PRIVATE clickhouse_common_io) diff --git a/src/Common/tests/shell_command_inout.cpp b/src/Common/tests/shell_command_inout.cpp new file mode 100644 index 00000000000..615700cd042 --- /dev/null +++ b/src/Common/tests/shell_command_inout.cpp @@ -0,0 +1,47 @@ +#include + +#include +#include + +#include +#include +#include + +/** This example shows how we can proxy stdin to ShellCommand and obtain stdout in streaming fashion. */ + +int main(int argc, char ** argv) +try +{ + using namespace DB; + + if (argc < 2) + { + std::cerr << "Usage: shell_command_inout 'command...' < in > out\n"; + return 1; + } + + auto command = ShellCommand::execute(argv[1]); + + ReadBufferFromFileDescriptor in(STDIN_FILENO); + WriteBufferFromFileDescriptor out(STDOUT_FILENO); + WriteBufferFromFileDescriptor err(STDERR_FILENO); + + /// Background thread sends data and foreground thread receives result. + + std::thread thread([&] + { + copyData(in, command->in); + command->in.close(); + }); + + copyData(command->out, out); + copyData(command->err, err); + + thread.join(); + return 0; +} +catch (...) +{ + std::cerr << DB::getCurrentExceptionMessage(true) << '\n'; + throw; +} diff --git a/src/Dictionaries/ExecutableDictionarySource.cpp b/src/Dictionaries/ExecutableDictionarySource.cpp index 918cf0732ab..74aab610e0d 100644 --- a/src/Dictionaries/ExecutableDictionarySource.cpp +++ b/src/Dictionaries/ExecutableDictionarySource.cpp @@ -1,12 +1,13 @@ #include "ExecutableDictionarySource.h" -#include -#include +#include #include #include #include #include #include +#include +#include #include #include #include @@ -16,6 +17,7 @@ #include "DictionaryStructure.h" #include "registerDictionaries.h" + namespace DB { static const UInt64 max_block_size = 8192; @@ -31,15 +33,23 @@ namespace /// Owns ShellCommand and calls wait for it. class ShellCommandOwningBlockInputStream : public OwningBlockInputStream { + private: + Poco::Logger * log; public: - ShellCommandOwningBlockInputStream(const BlockInputStreamPtr & impl, std::unique_ptr own_) - : OwningBlockInputStream(std::move(impl), std::move(own_)) + ShellCommandOwningBlockInputStream(Poco::Logger * log_, const BlockInputStreamPtr & impl, std::unique_ptr command_) + : OwningBlockInputStream(std::move(impl), std::move(command_)), log(log_) { } void readSuffix() override { OwningBlockInputStream::readSuffix(); + + std::string err; + readStringUntilEOF(err, own->err); + if (!err.empty()) + LOG_ERROR(log, "Having stderr: {}", err); + own->wait(); } }; @@ -80,7 +90,7 @@ BlockInputStreamPtr ExecutableDictionarySource::loadAll() LOG_TRACE(log, "loadAll {}", toString()); auto process = ShellCommand::execute(command); auto input_stream = context.getInputFormat(format, process->out, sample_block, max_block_size); - return std::make_shared(input_stream, std::move(process)); + return std::make_shared(log, input_stream, std::move(process)); } BlockInputStreamPtr ExecutableDictionarySource::loadUpdatedAll() @@ -95,67 +105,77 @@ BlockInputStreamPtr ExecutableDictionarySource::loadUpdatedAll() LOG_TRACE(log, "loadUpdatedAll {}", command_with_update_field); auto process = ShellCommand::execute(command_with_update_field); auto input_stream = context.getInputFormat(format, process->out, sample_block, max_block_size); - return std::make_shared(input_stream, std::move(process)); + return std::make_shared(log, input_stream, std::move(process)); } namespace { - /** A stream, that also runs and waits for background thread - * (that will feed data into pipe to be read from the other side of the pipe). + /** A stream, that runs child process and sends data to its stdin in background thread, + * and receives data from its stdout. */ class BlockInputStreamWithBackgroundThread final : public IBlockInputStream { public: BlockInputStreamWithBackgroundThread( - const BlockInputStreamPtr & stream_, std::unique_ptr && command_, std::packaged_task && task_) - : stream{stream_}, command{std::move(command_)}, task(std::move(task_)), thread([this] { - task(); - command->in.close(); - }) + const Context & context, + const std::string & format, + const Block & sample_block, + const std::string & command_str, + Poco::Logger * log_, + std::function && send_data_) + : log(log_), + command(ShellCommand::execute(command_str)), + send_data(std::move(send_data_)), + thread([this] { send_data(command->in); }) { - children.push_back(stream); + //WriteBufferFromFileDescriptor err(STDERR_FILENO); + //copyData(command->out, err); + //err.next(); + //thread.join(); + stream = context.getInputFormat(format, command->out, sample_block, max_block_size); } ~BlockInputStreamWithBackgroundThread() override { if (thread.joinable()) - { - try - { - readSuffix(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } + thread.join(); } - Block getHeader() const override { return stream->getHeader(); } + Block getHeader() const override + { + return stream->getHeader(); + } private: - Block readImpl() override { return stream->read(); } + Block readImpl() override + { + return stream->read(); + } + + void readPrefix() override + { + stream->readPrefix(); + } void readSuffix() override { - IBlockInputStream::readSuffix(); - if (!wait_called) - { - wait_called = true; - command->wait(); - } - thread.join(); - /// To rethrow an exception, if any. - task.get_future().get(); + stream->readSuffix(); + + std::string err; + readStringUntilEOF(err, command->err); + if (!err.empty()) + LOG_ERROR(log, "Having stderr: {}", err); + + command->wait(); } String getName() const override { return "WithBackgroundThread"; } + Poco::Logger * log; BlockInputStreamPtr stream; std::unique_ptr command; - std::packaged_task task; - ThreadFromGlobalPool thread; - bool wait_called = false; + std::function send_data; + mutable ThreadFromGlobalPool thread; }; } @@ -164,28 +184,29 @@ namespace BlockInputStreamPtr ExecutableDictionarySource::loadIds(const std::vector & ids) { LOG_TRACE(log, "loadIds {} size = {}", toString(), ids.size()); - auto process = ShellCommand::execute(command); - - auto output_stream = context.getOutputFormat(format, process->in, sample_block); - auto input_stream = context.getInputFormat(format, process->out, sample_block, max_block_size); return std::make_shared( - input_stream, std::move(process), std::packaged_task([output_stream, &ids]() mutable { formatIDs(output_stream, ids); })); + context, format, sample_block, command, log, + [&ids, this](WriteBufferFromFile & out) mutable + { + auto output_stream = context.getOutputFormat(format, out, sample_block); + formatIDs(output_stream, ids); + out.close(); + }); } BlockInputStreamPtr ExecutableDictionarySource::loadKeys(const Columns & key_columns, const std::vector & requested_rows) { LOG_TRACE(log, "loadKeys {} size = {}", toString(), requested_rows.size()); - auto process = ShellCommand::execute(command); - - auto output_stream = context.getOutputFormat(format, process->in, sample_block); - auto input_stream = context.getInputFormat(format, process->out, sample_block, max_block_size); return std::make_shared( - input_stream, std::move(process), std::packaged_task([output_stream, key_columns, &requested_rows, this]() mutable + context, format, sample_block, command, log, + [key_columns, &requested_rows, this](WriteBufferFromFile & out) mutable { + auto output_stream = context.getOutputFormat(format, out, sample_block); formatKeys(dict_struct, output_stream, key_columns, requested_rows); - })); + out.close(); + }); } bool ExecutableDictionarySource::isModified() const diff --git a/src/Dictionaries/ExecutableDictionarySource.h b/src/Dictionaries/ExecutableDictionarySource.h index f28d71ca5e3..b2aabf26323 100644 --- a/src/Dictionaries/ExecutableDictionarySource.h +++ b/src/Dictionaries/ExecutableDictionarySource.h @@ -14,6 +14,7 @@ namespace DB /// Allows loading dictionaries from executable class ExecutableDictionarySource final : public IDictionarySource { + friend class BlockInputStreamWithBackgroundThread; public: ExecutableDictionarySource( const DictionaryStructure & dict_struct_, diff --git a/tests/config/executable_dictionary.xml b/tests/config/executable_dictionary.xml new file mode 100644 index 00000000000..50df32e2ec6 --- /dev/null +++ b/tests/config/executable_dictionary.xml @@ -0,0 +1,108 @@ + + + + executable_complex + + + JSONEachRow + cd /; clickhouse-local --input-format JSONEachRow --output-format JSONEachRow --structure 'x UInt64, y UInt64' --query "SELECT x, y, x + y AS a, x * y AS b FROM table" + + + 0 + + + 1000 + + + + + + x + UInt64 + + + y + UInt64 + + + + a + UInt64 + 0 + + + b + UInt64 + 0 + + + + + + executable_simple + + + JSONEachRow + cd /; clickhouse-local --input-format JSONEachRow --output-format JSONEachRow --structure 'x UInt64' --query "SELECT x, x + x AS a, x * x AS b FROM table" + + + 0 + + + 1000 + + + + + x + + + a + UInt64 + 0 + + + b + UInt64 + 0 + + + + + + executable_complex_direct + + + JSONEachRow + cd /; clickhouse-local --input-format JSONEachRow --output-format JSONEachRow --structure 'x UInt64, y UInt64' --query "SELECT x, y, x + y AS a, x * y AS b FROM table" + + + 0 + + + + + + + x + UInt64 + + + y + UInt64 + + + + a + UInt64 + 0 + + + b + UInt64 + 0 + + + + + diff --git a/tests/queries/0_stateless/01474_executable_dictionary.reference b/tests/queries/0_stateless/01474_executable_dictionary.reference new file mode 100644 index 00000000000..4d0994b08c3 --- /dev/null +++ b/tests/queries/0_stateless/01474_executable_dictionary.reference @@ -0,0 +1,3 @@ +999999 1999998 999998000001 +999999 1999998 999998000001 +999999 1999998 999998000001 diff --git a/tests/queries/0_stateless/01474_executable_dictionary.sql b/tests/queries/0_stateless/01474_executable_dictionary.sql new file mode 100644 index 00000000000..727cf47f79f --- /dev/null +++ b/tests/queries/0_stateless/01474_executable_dictionary.sql @@ -0,0 +1,3 @@ +SELECT number, dictGet('executable_complex', 'a', (number, number)) AS a, dictGet('executable_complex', 'b', (number, number)) AS b FROM numbers(1000000) WHERE number = 999999; +SELECT number, dictGet('executable_complex_direct', 'a', (number, number)) AS a, dictGet('executable_complex_direct', 'b', (number, number)) AS b FROM numbers(1000000) WHERE number = 999999; +SELECT number, dictGet('executable_simple', 'a', number) AS a, dictGet('executable_simple', 'b', number) AS b FROM numbers(1000000) WHERE number = 999999;