mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
optimize_sorting_for_input_stream setting and perf tests
This commit is contained in:
parent
828f3711d2
commit
1fe83cc8d8
@ -613,6 +613,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
|
||||
M(Bool, compatibility_ignore_auto_increment_in_create_table, false, "Ignore AUTO_INCREMENT keyword in column declaration if true, otherwise return error. It simplifies migration from MySQL", 0) \
|
||||
M(Bool, multiple_joins_try_to_keep_original_names, false, "Do not add aliases to top level expression list on multiple joins rewrite", 0) \
|
||||
M(Bool, optimize_distinct_in_order, true, "Enable DISTINCT optimization if some columns in DISTINCT form a prefix of sorting. For example, prefix of sorting key in merge tree or ORDER BY statement", 0) \
|
||||
M(Bool, optimize_sorting_for_input_stream, true, "Optimize sorting to sorting properties of input stream", 0) \
|
||||
// End of COMMON_SETTINGS
|
||||
// Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS.
|
||||
|
||||
|
@ -1367,7 +1367,8 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
|
||||
settings.remerge_sort_lowered_memory_bytes_ratio,
|
||||
settings.max_bytes_before_external_sort,
|
||||
this->context->getTemporaryVolume(),
|
||||
settings.min_free_disk_space_for_temporary_data);
|
||||
settings.min_free_disk_space_for_temporary_data,
|
||||
settings.optimize_sorting_for_input_stream);
|
||||
sorting_step->setStepDescription(fmt::format("Sort {} before JOIN", is_right ? "right" : "left"));
|
||||
plan.addStep(std::move(sorting_step));
|
||||
};
|
||||
@ -2497,7 +2498,8 @@ void InterpreterSelectQuery::executeWindow(QueryPlan & query_plan)
|
||||
settings.remerge_sort_lowered_memory_bytes_ratio,
|
||||
settings.max_bytes_before_external_sort,
|
||||
context->getTemporaryVolume(),
|
||||
settings.min_free_disk_space_for_temporary_data);
|
||||
settings.min_free_disk_space_for_temporary_data,
|
||||
settings.optimize_sorting_for_input_stream);
|
||||
sorting_step->setStepDescription("Sorting for window '" + window.window_name + "'");
|
||||
query_plan.addStep(std::move(sorting_step));
|
||||
}
|
||||
@ -2555,7 +2557,8 @@ void InterpreterSelectQuery::executeOrder(QueryPlan & query_plan, InputOrderInfo
|
||||
settings.remerge_sort_lowered_memory_bytes_ratio,
|
||||
settings.max_bytes_before_external_sort,
|
||||
context->getTemporaryVolume(),
|
||||
settings.min_free_disk_space_for_temporary_data);
|
||||
settings.min_free_disk_space_for_temporary_data,
|
||||
settings.optimize_sorting_for_input_stream);
|
||||
|
||||
sorting_step->setStepDescription("Sorting for ORDER BY");
|
||||
query_plan.addStep(std::move(sorting_step));
|
||||
|
@ -38,7 +38,8 @@ SortingStep::SortingStep(
|
||||
double remerge_lowered_memory_bytes_ratio_,
|
||||
size_t max_bytes_before_external_sort_,
|
||||
VolumePtr tmp_volume_,
|
||||
size_t min_free_disk_space_)
|
||||
size_t min_free_disk_space_,
|
||||
bool optimize_sorting_for_input_stream_)
|
||||
: ITransformingStep(input_stream, input_stream.header, getTraits(limit_))
|
||||
, type(Type::Auto)
|
||||
, result_description(description_)
|
||||
@ -50,6 +51,7 @@ SortingStep::SortingStep(
|
||||
, max_bytes_before_external_sort(max_bytes_before_external_sort_)
|
||||
, tmp_volume(tmp_volume_)
|
||||
, min_free_disk_space(min_free_disk_space_)
|
||||
, optimize_sorting_for_input_stream(optimize_sorting_for_input_stream_)
|
||||
{
|
||||
/// TODO: check input_stream is partially sorted by the same description.
|
||||
output_stream->sort_description = result_description;
|
||||
@ -249,15 +251,18 @@ void SortingStep::transformPipeline(QueryPipelineBuilder & pipeline, const Build
|
||||
LOG_DEBUG(getLogger(), "Prefix({}): {}", prefix_description.size(), dumpSortDescription(prefix_description));
|
||||
LOG_DEBUG(getLogger(), "Result({}): {}", result_description.size(), dumpSortDescription(result_description));
|
||||
|
||||
if (input_sort_mode == DataStream::SortMode::Stream && input_sort_desc.hasPrefix(result_description))
|
||||
return;
|
||||
|
||||
/// merge sorted
|
||||
if (input_sort_mode == DataStream::SortMode::Port && input_sort_desc.hasPrefix(result_description))
|
||||
if (optimize_sorting_for_input_stream)
|
||||
{
|
||||
LOG_DEBUG(getLogger(), "MergingSorted, SortMode::Port");
|
||||
mergingSorted(pipeline, result_description, limit);
|
||||
return;
|
||||
if (input_sort_mode == DataStream::SortMode::Stream && input_sort_desc.hasPrefix(result_description))
|
||||
return;
|
||||
|
||||
/// merge sorted
|
||||
if (input_sort_mode == DataStream::SortMode::Port && input_sort_desc.hasPrefix(result_description))
|
||||
{
|
||||
LOG_DEBUG(getLogger(), "MergingSorted, SortMode::Port");
|
||||
mergingSorted(pipeline, result_description, limit);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (type == Type::MergingSorted)
|
||||
@ -277,7 +282,7 @@ void SortingStep::transformPipeline(QueryPipelineBuilder & pipeline, const Build
|
||||
return;
|
||||
}
|
||||
|
||||
if (input_sort_mode == DataStream::SortMode::Chunk)
|
||||
if (optimize_sorting_for_input_stream && input_sort_mode == DataStream::SortMode::Chunk)
|
||||
{
|
||||
if (input_sort_desc.hasPrefix(result_description))
|
||||
{
|
||||
@ -285,14 +290,6 @@ void SortingStep::transformPipeline(QueryPipelineBuilder & pipeline, const Build
|
||||
fullSort(pipeline, result_description, limit, true);
|
||||
return;
|
||||
}
|
||||
if (result_description.hasPrefix(input_sort_desc))
|
||||
{
|
||||
LOG_DEBUG(getLogger(), "FinishSorting, SortMode::Chunk");
|
||||
mergeSorting(pipeline, input_sort_desc, 0);
|
||||
mergingSorted(pipeline, input_sort_desc, 0);
|
||||
finishSorting(pipeline, input_sort_desc, result_description, limit);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
LOG_DEBUG(getLogger(), "FullSort");
|
||||
|
@ -22,7 +22,8 @@ public:
|
||||
double remerge_lowered_memory_bytes_ratio_,
|
||||
size_t max_bytes_before_external_sort_,
|
||||
VolumePtr tmp_volume_,
|
||||
size_t min_free_disk_space_);
|
||||
size_t min_free_disk_space_,
|
||||
bool optimize_sorting_for_input_stream_);
|
||||
|
||||
/// FinishSorting
|
||||
SortingStep(
|
||||
@ -86,6 +87,7 @@ private:
|
||||
size_t max_bytes_before_external_sort = 0;
|
||||
VolumePtr tmp_volume;
|
||||
size_t min_free_disk_space = 0;
|
||||
const bool optimize_sorting_for_input_stream = false;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -3,8 +3,6 @@
|
||||
|
||||
<!-- FIXME this should have been an EXPLAIN test, no point in measuring performance to deduce that the query was rewritten -->
|
||||
<query>SELECT * FROM (SELECT CounterID, EventDate FROM hits_10m_single ORDER BY CounterID DESC) ORDER BY EventDate, CounterID FORMAT Null</query>
|
||||
<query>SELECT * FROM (SELECT CounterID, EventDate FROM hits_10m_single ORDER BY CounterID, EventDate) ORDER BY CounterID FORMAT Null</query>
|
||||
<query>SELECT * FROM (SELECT CounterID, EventDate FROM hits_10m_single ORDER BY CounterID) ORDER BY CounterID, EventDate FORMAT Null</query>
|
||||
<query>SELECT DISTINCT * FROM (SELECT DISTINCT CounterID, EventDate FROM hits_10m_single) FORMAT Null</query>
|
||||
<query>SELECT DISTINCT * FROM (SELECT DISTINCT CounterID, EventDate FROM hits_10m_single ORDER BY CounterID DESC) ORDER BY toStartOfWeek(EventDate) FORMAT Null</query>
|
||||
</test>
|
||||
|
@ -1,4 +1,15 @@
|
||||
<test>
|
||||
<settings><optimize_sorting_for_input_stream>1</optimize_sorting_for_input_stream></settings>
|
||||
|
||||
<!-- ORDER BY key is prefix of MergeTree sorting key -->
|
||||
<query>SELECT CounterID, EventDate FROM hits_10m_single ORDER BY CounterID SETTINGS optimize_read_in_order=1 FORMAT Null</query>
|
||||
<query>SELECT CounterID, EventDate FROM hits_10m_single ORDER BY CounterID SETTINGS optimize_read_in_order=0 FORMAT Null</query>
|
||||
|
||||
<!-- MergeTree sorting key is prefix of ORDER BY key -->
|
||||
<query>SELECT CounterID, EventTime FROM hits_10m_single ORDER BY CounterID, EventTime SETTINGS optimize_read_in_order=1 format Null</query>
|
||||
<query>SELECT CounterID, EventTime FROM hits_10m_single ORDER BY CounterID, EventTime SETTINGS optimize_read_in_order=0 format Null</query>
|
||||
|
||||
<!-- sorting step getting sort description from subquery -->
|
||||
<query>SELECT * FROM (SELECT CounterID, EventDate FROM hits_10m_single) ORDER BY CounterID SETTINGS optimize_read_in_order=1 FORMAT Null</query>
|
||||
<query>SELECT * FROM (SELECT CounterID, EventDate FROM hits_10m_single) ORDER BY CounterID SETTINGS optimize_read_in_order=0 FORMAT Null</query>
|
||||
<query>SELECT * FROM (SELECT CounterID, EventDate FROM hits_10m_single ORDER BY CounterID, EventDate) ORDER BY CounterID SETTINGS optimize_duplicate_order_by_and_distinct=1 FORMAT Null</query>
|
Loading…
Reference in New Issue
Block a user