Merge pull request #21630 from nikitamikhaylov/pf-local

Parallel formatting for clickhouse-local
This commit is contained in:
Nikita Mikhaylov 2021-03-16 14:37:40 +03:00 committed by GitHub
commit 7d7a2bc724
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 23 additions and 13 deletions

View File

@ -1180,7 +1180,7 @@ try
file_in.seek(0, SEEK_SET);
BlockInputStreamPtr input = context.getInputFormat(input_format, file_in, header, max_block_size);
BlockOutputStreamPtr output = context.getOutputStream(output_format, file_out, header);
BlockOutputStreamPtr output = context.getOutputStreamParallelIfPossible(output_format, file_out, header);
if (processed_rows + source_rows > limit)
input = std::make_shared<LimitBlockInputStream>(input, limit - processed_rows, 0);

View File

@ -176,7 +176,7 @@ void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse
std::string query = params.get("query");
LOG_TRACE(log, "Query: {}", query);
BlockOutputStreamPtr writer = FormatFactory::instance().getOutputStream(format, out, *sample_block, context);
BlockOutputStreamPtr writer = FormatFactory::instance().getOutputStreamParallelIfPossible(format, out, *sample_block, context);
auto pool = getPool(connection_string);
ODBCBlockInputStream inp(pool->get(), query, *sample_block, max_block_size);
copyData(inp, *writer);

View File

@ -136,7 +136,7 @@ BlockInputStreamPtr HTTPDictionarySource::loadIds(const std::vector<UInt64> & id
ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback = [block, this](std::ostream & ostr)
{
WriteBufferFromOStream out_buffer(ostr);
auto output_stream = context.getOutputStream(format, out_buffer, sample_block);
auto output_stream = context.getOutputStreamParallelIfPossible(format, out_buffer, sample_block);
formatBlock(output_stream, block);
};
@ -157,7 +157,7 @@ BlockInputStreamPtr HTTPDictionarySource::loadKeys(const Columns & key_columns,
ReadWriteBufferFromHTTP::OutStreamCallback out_stream_callback = [block, this](std::ostream & ostr)
{
WriteBufferFromOStream out_buffer(ostr);
auto output_stream = context.getOutputStream(format, out_buffer, sample_block);
auto output_stream = context.getOutputStreamParallelIfPossible(format, out_buffer, sample_block);
formatBlock(output_stream, block);
};

View File

@ -64,12 +64,12 @@ struct Progress
std::atomic<size_t> written_rows {0};
std::atomic<size_t> written_bytes {0};
Progress() {}
Progress() = default;
Progress(size_t read_rows_, size_t read_bytes_, size_t total_rows_to_read_ = 0)
: read_rows(read_rows_), read_bytes(read_bytes_), total_rows_to_read(total_rows_to_read_) {}
Progress(ReadProgress read_progress)
explicit Progress(ReadProgress read_progress)
: read_rows(read_progress.read_rows), read_bytes(read_progress.read_bytes), total_rows_to_read(read_progress.total_rows_to_read) {}
Progress(WriteProgress write_progress)
explicit Progress(WriteProgress write_progress)
: written_rows(write_progress.written_rows), written_bytes(write_progress.written_bytes) {}
void read(ReadBuffer & in, UInt64 server_revision);
@ -86,7 +86,7 @@ struct Progress
written_rows += rhs.written_rows;
written_bytes += rhs.written_bytes;
return rhs.read_rows || rhs.written_rows ? true : false;
return rhs.read_rows || rhs.written_rows;
}
void reset()

View File

@ -1014,7 +1014,7 @@ void executeQuery(
? getIdentifierName(ast_query_with_output->format)
: context.getDefaultFormat();
auto out = context.getOutputStream(format_name, *out_buf, streams.in->getHeader());
auto out = context.getOutputStreamParallelIfPossible(format_name, *out_buf, streams.in->getHeader());
/// Save previous progress callback if any. TODO Do it more conveniently.
auto previous_progress_callback = context.getProgressCallback();
@ -1059,7 +1059,7 @@ void executeQuery(
return std::make_shared<MaterializingTransform>(header);
});
auto out = context.getOutputFormat(format_name, *out_buf, pipeline.getHeader());
auto out = context.getOutputFormatParallelIfPossible(format_name, *out_buf, pipeline.getHeader());
out->setAutoFlush();
/// Save previous progress callback if any. TODO Do it more conveniently.

View File

@ -6,6 +6,7 @@
#include <Common/ThreadPool.h>
#include <common/logger_useful.h>
#include <Common/Exception.h>
#include "IO/WriteBufferFromString.h"
#include <Formats/FormatFactory.h>
#include <Poco/Event.h>
#include <IO/BufferWithOwnMemory.h>
@ -101,6 +102,15 @@ public:
finishAndWait();
}
/// There are no formats which support parallel formatting and progress writing at the same time
void onProgress(const Progress &) override {}
String getContentType() const override
{
WriteBufferFromOwnString buffer;
return internal_formatter_creator(buffer)->getContentType();
}
protected:
void consume(Chunk chunk) override final
{

View File

@ -183,7 +183,7 @@ public:
: sample_block(sample_block_)
{
write_buf = wrapWriteBufferWithCompressionMethod(std::make_unique<WriteBufferFromHDFS>(uri, context.getGlobalContext().getConfigRef()), compression_method, 3);
writer = FormatFactory::instance().getOutputStream(format, *write_buf, sample_block, context);
writer = FormatFactory::instance().getOutputStreamParallelIfPossible(format, *write_buf, sample_block, context);
}
Block getHeader() const override

View File

@ -155,7 +155,7 @@ namespace
{
write_buf = wrapWriteBufferWithCompressionMethod(
std::make_unique<WriteBufferFromS3>(client, bucket, key, min_upload_part_size, max_single_part_upload_size), compression_method, 3);
writer = FormatFactory::instance().getOutputStream(format, *write_buf, sample_block, context);
writer = FormatFactory::instance().getOutputStreamParallelIfPossible(format, *write_buf, sample_block, context);
}
Block getHeader() const override

View File

@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=5&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0" -d 'SELECT max(number) FROM numbers(10)' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Progress|^[0-9]'
# This test will fail with external poco (progress not supported)
${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0" -d 'SELECT number FROM numbers(10)' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Progress|^[0-9]'
${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0&output_format_parallel_formatting=0" -d 'SELECT number FROM numbers(10)' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Progress|^[0-9]'
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&max_block_size=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0&enable_http_compression=1" -H 'Accept-Encoding: gzip' -d 'SELECT number FROM system.numbers LIMIT 10' | gzip -d
# 'send_progress_in_http_headers' is false by default