This commit is contained in:
Nikita Taranov 2024-09-19 15:54:45 +01:00
parent b4504f20bf
commit d6d55ca3ef
3 changed files with 12 additions and 6 deletions

View File

@ -784,12 +784,13 @@ namespace ErrorCodes
M(Bool, local_filesystem_read_prefetch, false, "Should use prefetching when reading data from local filesystem.", 0) \
M(Bool, remote_filesystem_read_prefetch, true, "Should use prefetching when reading data from remote filesystem.", 0) \
M(Int64, read_priority, 0, "Priority to read data from local filesystem or remote filesystem. Only supported for 'pread_threadpool' method for local filesystem and for `threadpool` method for remote filesystem.", 0) \
M(UInt64, merge_tree_min_rows_for_concurrent_read_for_remote_filesystem, (20 * 8192), "If at least as many lines are read from one file, the reading can be parallelized, when reading from remote filesystem.", 0) \
M(UInt64, merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem, (24 * 10 * 1024 * 1024), "If at least as many bytes are read from one file, the reading can be parallelized, when reading from remote filesystem.", 0) \
M(UInt64, merge_tree_min_rows_for_concurrent_read_for_remote_filesystem, 0, "Setting is deprecated.", 0) \
M(UInt64, merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem, 0, "Setting is deprecated.", 0) \
M(UInt64, remote_read_min_bytes_for_seek, 4 * DBMS_DEFAULT_BUFFER_SIZE, "Min bytes required for remote read (url, s3) to do seek, instead of read with ignore.", 0) \
M(UInt64, merge_tree_min_bytes_per_task_for_remote_reading, 2 * DBMS_DEFAULT_BUFFER_SIZE, "Min bytes to read per task.", 0) ALIAS(filesystem_prefetch_min_bytes_for_single_read_task) \
M(Bool, merge_tree_use_const_size_tasks_for_remote_reading, true, "Whether to use constant size tasks for reading from a remote table.", 0) \
M(Bool, merge_tree_determine_task_size_by_prewhere_columns, true, "Whether to use only prewhere columns size to determine reading task size.", 0) \
M(UInt64, merge_tree_min_read_task_size, 1, "Hard lower limit on the task size (even when the number of granules is low and the number of available threads is high we won't allocate smaller tasks) (I HOPE TO REMOVE IT AFTER TESTING)", 0) \
M(UInt64, merge_tree_compact_parts_min_granules_to_multibuffer_read, 16, "Only available in ClickHouse Cloud", 0) \
\
M(Bool, async_insert, false, "If true, data from INSERT query is stored in queue and later flushed to table in background. If wait_for_async_insert is false, INSERT query is processed almost instantly, otherwise client will wait until data will be flushed to table", 0) \

View File

@ -85,7 +85,10 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
{"parallel_replicas_local_plan", false, false, "Use local plan for local replica in a query with parallel replicas"},
{"join_to_sort_minimum_perkey_rows", 0, 40, "The lower limit of per-key average rows in the right table to determine whether to rerange the right table by key in left or inner join. This setting ensures that the optimization is not applied for sparse table keys"},
{"join_to_sort_maximum_table_rows", 0, 10000, "The maximum number of rows in the right table to determine whether to rerange the right table by key in left or inner join"},
{"allow_experimental_join_right_table_sorting", false, false, "If it is set to true, and the conditions of `join_to_sort_minimum_perkey_rows` and `join_to_sort_maximum_table_rows` are met, rerange the right table by key to improve the performance in left or inner hash join"}
{"allow_experimental_join_right_table_sorting", false, false, "If it is set to true, and the conditions of `join_to_sort_minimum_perkey_rows` and `join_to_sort_maximum_table_rows` are met, rerange the right table by key to improve the performance in left or inner hash join"},
{"merge_tree_min_read_task_size", 1, 1, "New setting"},
{"merge_tree_min_rows_for_concurrent_read_for_remote_filesystem", (20 * 8192), 0, "Setting is deprecated"},
{"merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem", (24 * 10 * 1024 * 1024), 0, "Setting is deprecated"},
}
},
{"24.8",

View File

@ -10,6 +10,7 @@ namespace Setting
{
extern const SettingsBool merge_tree_determine_task_size_by_prewhere_columns;
extern const SettingsUInt64 merge_tree_min_bytes_per_task_for_remote_reading;
extern const SettingsUInt64 merge_tree_min_read_task_size;
}
namespace ErrorCodes
@ -62,7 +63,8 @@ static size_t calculateMinMarksPerTask(
const MergeTreeReadPoolBase::PoolSettings & pool_settings,
const Settings & settings)
{
size_t min_marks_per_task = pool_settings.min_marks_for_concurrent_read;
size_t min_marks_per_task
= std::max<size_t>(settings[Setting::merge_tree_min_read_task_size], pool_settings.min_marks_for_concurrent_read);
const size_t part_marks_count = part.getMarksCount();
if (part_marks_count && part.data_part->isStoredOnRemoteDisk())
{
@ -82,7 +84,7 @@ static size_t calculateMinMarksPerTask(
= std::min<size_t>(pool_settings.sum_marks / pool_settings.threads / 2, min_bytes_per_task / avg_mark_bytes);
if (heuristic_min_marks > min_marks_per_task)
{
LOG_TEST(
LOG_TRACE(
&Poco::Logger::get("MergeTreeReadPoolBase"),
"Increasing min_marks_per_task from {} to {} based on columns size heuristic",
min_marks_per_task,
@ -91,7 +93,7 @@ static size_t calculateMinMarksPerTask(
}
}
LOG_TEST(&Poco::Logger::get("MergeTreeReadPoolBase"), "Will use min_marks_per_task={}", min_marks_per_task);
LOG_TRACE(&Poco::Logger::get("MergeTreeReadPoolBase"), "Will use min_marks_per_task={}", min_marks_per_task);
return min_marks_per_task;
}