This commit is contained in:
nikitamikhaylov 2020-11-05 21:07:14 +03:00
parent 4e85d6a4c3
commit dabb23b668
5 changed files with 15 additions and 3 deletions

View File

@ -17,7 +17,7 @@ ParallelParsingBlockInputStream::ParallelParsingBlockInputStream(const Params &
// Subtract one thread that we use for segmentation and one for
// reading. After that, must have at least two threads left for
// parsing. See the assertion below.
pool(std::max(2, params.max_threads - 2)),
pool(std::max(2, static_cast<int>(params.max_threads) - 2)),
file_segmentation_engine(params.file_segmentation_engine)
{
// See comment above.

View File

@ -69,7 +69,7 @@ public:
const InputProcessorCreator & input_processor_creator;
const InputCreatorParams & input_creator_params;
FormatFactory::FileSegmentationEngine file_segmentation_engine;
int max_threads;
size_t max_threads;
size_t min_chunk_bytes;
};

View File

@ -166,6 +166,9 @@ BlockInputStreamPtr FormatFactory::getInput(
// (segmentator + two parsers + reader).
bool parallel_parsing = settings.input_format_parallel_parsing && file_segmentation_engine && settings.max_threads >= 4;
if (settings.min_chunk_bytes_for_parallel_parsing * settings.max_threads * 2 > settings.max_memory_usage)
parallel_parsing = false;
if (parallel_parsing && name == "JSONEachRow")
{
/// FIXME ParallelParsingBlockInputStream doesn't support formats with non-trivial readPrefix() and readSuffix()
@ -195,7 +198,7 @@ BlockInputStreamPtr FormatFactory::getInput(
auto input_creator_params = ParallelParsingBlockInputStream::InputCreatorParams{sample, row_input_format_params, format_settings};
ParallelParsingBlockInputStream::Params params{buf, input_getter,
input_creator_params, file_segmentation_engine,
static_cast<int>(settings.max_threads),
settings.max_threads,
settings.min_chunk_bytes_for_parallel_parsing};
return std::make_shared<ParallelParsingBlockInputStream>(params);
}

View File

@ -0,0 +1 @@
19884108

View File

@ -0,0 +1,8 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
. "$CURDIR"/../shell_config.sh
yes http://foobarfoobarfoobarfoobarfoobarfoobarfoobar.com | head -c1G > 1g.csv
$CLICKHOUSE_LOCAL --stacktrace --input_format_parallel_parsing=1 --max_memory_usage=100Mi -q "select count() from file('1g.csv', 'TSV', 'URL String')"