mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 23:21:59 +00:00
docs
This commit is contained in:
parent
60ee52cb8d
commit
980528ed75
@ -108,9 +108,9 @@ struct Settings : public SettingsCollection<Settings>
|
||||
M(SettingBool, distributed_group_by_no_merge, false, "Do not merge aggregation states from different servers for distributed query processing - in case it is for certain that there are different keys on different shards.") \
|
||||
M(SettingBool, optimize_skip_unused_shards, false, "Assumes that data is distributed by sharding_key. Optimization to skip unused shards if SELECT query filters by sharding_key.") \
|
||||
\
|
||||
M(SettingBool, input_format_parallel_parsing, true, "Enable parallel parsing for several data formats (JSON, TSV, TKSV, Values, CSV).") \
|
||||
M(SettingUInt64, max_threads_for_parallel_reading, 10, "The maximum number of threads to parallel reading. By default, it is set to max_threads.") \
|
||||
M(SettingUInt64, min_chunk_size_for_parallel_reading, (1024 * 1024), "The minimum chunk size in bytes, which each thread tries to parse under mutex in parallel reading.") \
|
||||
M(SettingBool, input_format_parallel_parsing, true, "Enable parallel parsing for several data formats (JSONEachRow, TSV, TKSV, CSV).") \
|
||||
M(SettingUInt64, max_threads_for_parallel_parsing, 10, "The maximum number of threads to parallel parsing.") \
|
||||
M(SettingUInt64, min_chunk_size_for_parallel_parsing, (1024 * 1024), "The minimum chunk size in bytes, which each thread will parse in parallel.") \
|
||||
\
|
||||
M(SettingUInt64, merge_tree_min_rows_for_concurrent_read, (20 * 8192), "If at least as many lines are read from one file, the reading can be parallelized.") \
|
||||
M(SettingUInt64, merge_tree_min_bytes_for_concurrent_read, (24 * 10 * 1024 * 1024), "If at least as many bytes are read from one file, the reading can be parallelized.") \
|
||||
|
@ -16,8 +16,8 @@ namespace DB
|
||||
/**
|
||||
* ORDER-PRESERVING parallel parsing of data formats.
|
||||
* It splits original data into chunks. Then each chunk is parsed by different thread.
|
||||
* The number of chunks equals to max_threads_for_parallel_reading setting.
|
||||
* The size of chunk is equal to min_chunk_size_for_parallel_reading setting.
|
||||
* The number of chunks equals to max_threads_for_parallel_parsing setting.
|
||||
* The size of chunk is equal to min_chunk_size_for_parallel_parsing setting.
|
||||
*/
|
||||
class ParallelParsingBlockInputStream : public IBlockInputStream
|
||||
{
|
||||
|
@ -124,12 +124,12 @@ BlockInputStreamPtr FormatFactory::getInput(
|
||||
row_input_format_params.max_execution_time = settings.max_execution_time;
|
||||
row_input_format_params.timeout_overflow_mode = settings.timeout_overflow_mode;
|
||||
|
||||
size_t max_threads_to_use = settings.max_threads_for_parallel_reading;
|
||||
size_t max_threads_to_use = settings.max_threads_for_parallel_parsing;
|
||||
if (!max_threads_to_use)
|
||||
max_threads_to_use = settings.max_threads;
|
||||
|
||||
auto params = ParallelParsingBlockInputStream::InputCreatorParams{sample, context, row_input_format_params, format_settings};
|
||||
ParallelParsingBlockInputStream::Builder builder{buf, input_getter, params, file_segmentation_engine, max_threads_to_use, settings.min_chunk_size_for_parallel_reading};
|
||||
ParallelParsingBlockInputStream::Builder builder{buf, input_getter, params, file_segmentation_engine, max_threads_to_use, settings.min_chunk_size_for_parallel_parsing};
|
||||
return std::make_shared<ParallelParsingBlockInputStream>(builder);
|
||||
}
|
||||
|
||||
|
@ -980,4 +980,25 @@ Lower values mean higher priority. Threads with low `nice` priority values are e
|
||||
|
||||
Default value: 0.
|
||||
|
||||
## input_format_parallel_parsing
|
||||
|
||||
- Type: bool
|
||||
- Default value: True
|
||||
|
||||
Enable order-preserving parallel parsing of data formats such as JSONEachRow, TSV, TKSV and CSV. Reading will be single threaded and parsing will be multithreaded.
|
||||
|
||||
## max_threads_for_parallel_parsing
|
||||
|
||||
- Type: unsigned int
|
||||
- Default value: 10
|
||||
|
||||
The maximum number of threads for order-preserving parallel parsing of data formats.
|
||||
|
||||
## min_chunk_size_for_parallel_parsing
|
||||
|
||||
- Type: unsigned int
|
||||
- Default value: 1024 * 1024
|
||||
|
||||
The minimum chunk size in bytes, which each thread will parse in parallel. By default it equals to one megabyte.
|
||||
|
||||
[Original article](https://clickhouse.yandex/docs/en/operations/settings/settings/) <!-- hide -->
|
||||
|
Loading…
Reference in New Issue
Block a user