This commit is contained in:
nikitamikhaylov 2020-11-05 21:07:14 +03:00
parent 4e85d6a4c3
commit dabb23b668
5 changed files with 15 additions and 3 deletions

View File

@ -17,7 +17,7 @@ ParallelParsingBlockInputStream::ParallelParsingBlockInputStream(const Params &
// Subtract one thread that we use for segmentation and one for // Subtract one thread that we use for segmentation and one for
// reading. After that, must have at least two threads left for // reading. After that, must have at least two threads left for
// parsing. See the assertion below. // parsing. See the assertion below.
pool(std::max(2, params.max_threads - 2)), pool(std::max(2, static_cast<int>(params.max_threads) - 2)),
file_segmentation_engine(params.file_segmentation_engine) file_segmentation_engine(params.file_segmentation_engine)
{ {
// See comment above. // See comment above.

View File

@ -69,7 +69,7 @@ public:
const InputProcessorCreator & input_processor_creator; const InputProcessorCreator & input_processor_creator;
const InputCreatorParams & input_creator_params; const InputCreatorParams & input_creator_params;
FormatFactory::FileSegmentationEngine file_segmentation_engine; FormatFactory::FileSegmentationEngine file_segmentation_engine;
int max_threads; size_t max_threads;
size_t min_chunk_bytes; size_t min_chunk_bytes;
}; };

View File

@ -166,6 +166,9 @@ BlockInputStreamPtr FormatFactory::getInput(
// (segmentator + two parsers + reader). // (segmentator + two parsers + reader).
bool parallel_parsing = settings.input_format_parallel_parsing && file_segmentation_engine && settings.max_threads >= 4; bool parallel_parsing = settings.input_format_parallel_parsing && file_segmentation_engine && settings.max_threads >= 4;
if (settings.min_chunk_bytes_for_parallel_parsing * settings.max_threads * 2 > settings.max_memory_usage)
parallel_parsing = false;
if (parallel_parsing && name == "JSONEachRow") if (parallel_parsing && name == "JSONEachRow")
{ {
/// FIXME ParallelParsingBlockInputStream doesn't support formats with non-trivial readPrefix() and readSuffix() /// FIXME ParallelParsingBlockInputStream doesn't support formats with non-trivial readPrefix() and readSuffix()
@ -195,7 +198,7 @@ BlockInputStreamPtr FormatFactory::getInput(
auto input_creator_params = ParallelParsingBlockInputStream::InputCreatorParams{sample, row_input_format_params, format_settings}; auto input_creator_params = ParallelParsingBlockInputStream::InputCreatorParams{sample, row_input_format_params, format_settings};
ParallelParsingBlockInputStream::Params params{buf, input_getter, ParallelParsingBlockInputStream::Params params{buf, input_getter,
input_creator_params, file_segmentation_engine, input_creator_params, file_segmentation_engine,
static_cast<int>(settings.max_threads), settings.max_threads,
settings.min_chunk_bytes_for_parallel_parsing}; settings.min_chunk_bytes_for_parallel_parsing};
return std::make_shared<ParallelParsingBlockInputStream>(params); return std::make_shared<ParallelParsingBlockInputStream>(params);
} }

View File

@ -0,0 +1 @@
19884108

View File

@ -0,0 +1,8 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
. "$CURDIR"/../shell_config.sh
yes http://foobarfoobarfoobarfoobarfoobarfoobarfoobar.com | head -c1G > 1g.csv
$CLICKHOUSE_LOCAL --stacktrace --input_format_parallel_parsing=1 --max_memory_usage=100Mi -q "select count() from file('1g.csv', 'TSV', 'URL String')"