mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Merge pull request #21630 from nikitamikhaylov/pf-local
Parallel formatting for clickhouse-local
This commit is contained in:
commit
7d7a2bc724
@ -1180,7 +1180,7 @@ try
|
||||
file_in.seek(0, SEEK_SET);
|
||||
|
||||
BlockInputStreamPtr input = context.getInputFormat(input_format, file_in, header, max_block_size);
|
||||
BlockOutputStreamPtr output = context.getOutputStream(output_format, file_out, header);
|
||||
BlockOutputStreamPtr output = context.getOutputStreamParallelIfPossible(output_format, file_out, header);
|
||||
|
||||
if (processed_rows + source_rows > limit)
|
||||
input = std::make_shared<LimitBlockInputStream>(input, limit - processed_rows, 0);
|
||||
|
@ -176,7 +176,7 @@ void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse
|
||||
std::string query = params.get("query");
|
||||
LOG_TRACE(log, "Query: {}", query);
|
||||
|
||||
BlockOutputStreamPtr writer = FormatFactory::instance().getOutputStream(format, out, *sample_block, context);
|
||||
BlockOutputStreamPtr writer = FormatFactory::instance().getOutputStreamParallelIfPossible(format, out, *sample_block, context);
|
||||
auto pool = getPool(connection_string);
|
||||
ODBCBlockInputStream inp(pool->get(), query, *sample_block, max_block_size);
|
||||
copyData(inp, *writer);
|
||||
|
@ -136,7 +136,7 @@ BlockInputStreamPtr HTTPDictionarySource::loadIds(const std::vector<UInt64> & id
|
||||
ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback = [block, this](std::ostream & ostr)
|
||||
{
|
||||
WriteBufferFromOStream out_buffer(ostr);
|
||||
auto output_stream = context.getOutputStream(format, out_buffer, sample_block);
|
||||
auto output_stream = context.getOutputStreamParallelIfPossible(format, out_buffer, sample_block);
|
||||
formatBlock(output_stream, block);
|
||||
};
|
||||
|
||||
@ -157,7 +157,7 @@ BlockInputStreamPtr HTTPDictionarySource::loadKeys(const Columns & key_columns,
|
||||
ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback = [block, this](std::ostream & ostr)
|
||||
{
|
||||
WriteBufferFromOStream out_buffer(ostr);
|
||||
auto output_stream = context.getOutputStream(format, out_buffer, sample_block);
|
||||
auto output_stream = context.getOutputStreamParallelIfPossible(format, out_buffer, sample_block);
|
||||
formatBlock(output_stream, block);
|
||||
};
|
||||
|
||||
|
@ -64,12 +64,12 @@ struct Progress
|
||||
std::atomic<size_t> written_rows {0};
|
||||
std::atomic<size_t> written_bytes {0};
|
||||
|
||||
Progress() {}
|
||||
Progress() = default;
|
||||
Progress(size_t read_rows_, size_t read_bytes_, size_t total_rows_to_read_ = 0)
|
||||
: read_rows(read_rows_), read_bytes(read_bytes_), total_rows_to_read(total_rows_to_read_) {}
|
||||
Progress(ReadProgress read_progress)
|
||||
explicit Progress(ReadProgress read_progress)
|
||||
: read_rows(read_progress.read_rows), read_bytes(read_progress.read_bytes), total_rows_to_read(read_progress.total_rows_to_read) {}
|
||||
Progress(WriteProgress write_progress)
|
||||
explicit Progress(WriteProgress write_progress)
|
||||
: written_rows(write_progress.written_rows), written_bytes(write_progress.written_bytes) {}
|
||||
|
||||
void read(ReadBuffer & in, UInt64 server_revision);
|
||||
@ -86,7 +86,7 @@ struct Progress
|
||||
written_rows += rhs.written_rows;
|
||||
written_bytes += rhs.written_bytes;
|
||||
|
||||
return rhs.read_rows || rhs.written_rows ? true : false;
|
||||
return rhs.read_rows || rhs.written_rows;
|
||||
}
|
||||
|
||||
void reset()
|
||||
|
@ -1014,7 +1014,7 @@ void executeQuery(
|
||||
? getIdentifierName(ast_query_with_output->format)
|
||||
: context.getDefaultFormat();
|
||||
|
||||
auto out = context.getOutputStream(format_name, *out_buf, streams.in->getHeader());
|
||||
auto out = context.getOutputStreamParallelIfPossible(format_name, *out_buf, streams.in->getHeader());
|
||||
|
||||
/// Save previous progress callback if any. TODO Do it more conveniently.
|
||||
auto previous_progress_callback = context.getProgressCallback();
|
||||
@ -1059,7 +1059,7 @@ void executeQuery(
|
||||
return std::make_shared<MaterializingTransform>(header);
|
||||
});
|
||||
|
||||
auto out = context.getOutputFormat(format_name, *out_buf, pipeline.getHeader());
|
||||
auto out = context.getOutputFormatParallelIfPossible(format_name, *out_buf, pipeline.getHeader());
|
||||
out->setAutoFlush();
|
||||
|
||||
/// Save previous progress callback if any. TODO Do it more conveniently.
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <Common/ThreadPool.h>
|
||||
#include <common/logger_useful.h>
|
||||
#include <Common/Exception.h>
|
||||
#include "IO/WriteBufferFromString.h"
|
||||
#include <Formats/FormatFactory.h>
|
||||
#include <Poco/Event.h>
|
||||
#include <IO/BufferWithOwnMemory.h>
|
||||
@ -101,6 +102,15 @@ public:
|
||||
finishAndWait();
|
||||
}
|
||||
|
||||
/// There are no formats which support parallel formatting and progress writing at the same time
|
||||
void onProgress(const Progress &) override {}
|
||||
|
||||
String getContentType() const override
|
||||
{
|
||||
WriteBufferFromOwnString buffer;
|
||||
return internal_formatter_creator(buffer)->getContentType();
|
||||
}
|
||||
|
||||
protected:
|
||||
void consume(Chunk chunk) override final
|
||||
{
|
||||
|
@ -183,7 +183,7 @@ public:
|
||||
: sample_block(sample_block_)
|
||||
{
|
||||
write_buf = wrapWriteBufferWithCompressionMethod(std::make_unique<WriteBufferFromHDFS>(uri, context.getGlobalContext().getConfigRef()), compression_method, 3);
|
||||
writer = FormatFactory::instance().getOutputStream(format, *write_buf, sample_block, context);
|
||||
writer = FormatFactory::instance().getOutputStreamParallelIfPossible(format, *write_buf, sample_block, context);
|
||||
}
|
||||
|
||||
Block getHeader() const override
|
||||
|
@ -155,7 +155,7 @@ namespace
|
||||
{
|
||||
write_buf = wrapWriteBufferWithCompressionMethod(
|
||||
std::make_unique<WriteBufferFromS3>(client, bucket, key, min_upload_part_size, max_single_part_upload_size), compression_method, 3);
|
||||
writer = FormatFactory::instance().getOutputStream(format, *write_buf, sample_block, context);
|
||||
writer = FormatFactory::instance().getOutputStreamParallelIfPossible(format, *write_buf, sample_block, context);
|
||||
}
|
||||
|
||||
Block getHeader() const override
|
||||
|
@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=5&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0" -d 'SELECT max(number) FROM numbers(10)' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Progress|^[0-9]'
|
||||
# This test will fail with external poco (progress not supported)
|
||||
|
||||
${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0" -d 'SELECT number FROM numbers(10)' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Progress|^[0-9]'
|
||||
${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0&output_format_parallel_formatting=0" -d 'SELECT number FROM numbers(10)' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Progress|^[0-9]'
|
||||
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&max_block_size=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0&enable_http_compression=1" -H 'Accept-Encoding: gzip' -d 'SELECT number FROM system.numbers LIMIT 10' | gzip -d
|
||||
|
||||
# 'send_progress_in_http_headers' is false by default
|
||||
|
Loading…
Reference in New Issue
Block a user