more fixes

This commit is contained in:
Nickita Taranov 2022-03-29 21:16:05 +02:00
parent 5590f78dfe
commit ce40d84eef
5 changed files with 35 additions and 29 deletions

View File

@ -2,35 +2,48 @@
#include <Processors/QueryPlan/ExpressionStep.h>
#include <Processors/QueryPlan/Optimizations/Optimizations.h>
#include <Processors/QueryPlan/SortingStep.h>
#include <Common/Exception.h>
#include <base/logger_useful.h>
#include <Poco/Logger.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
}
namespace DB::QueryPlanOptimizations
namespace
{
void swapSortingAndUnneededCalculations(QueryPlan::Node * parent_node, ActionsDAGPtr && unneeded_for_sorting)
void swapSortingAndUnneededCalculations(DB::QueryPlan::Node * parent_node, DB::ActionsDAGPtr && unneeded_for_sorting)
{
QueryPlan::Node * child_node = parent_node->children.front();
DB::QueryPlan::Node * child_node = parent_node->children.front();
auto & parent_step = parent_node->step;
auto & child_step = child_node->step;
auto * sorting_step = typeid_cast<SortingStep *>(parent_step.get());
auto * sorting_step = typeid_cast<DB::SortingStep *>(parent_step.get());
// Sorting -> Expression
std::swap(parent_step, child_step);
// Expression -> Sorting
sorting_step->updateInputStream(child_node->children.at(0)->step->getOutputStream());
auto input_header = sorting_step->getInputStreams().at(0).header;
if (child_node->children.size() != 1)
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "SortingStep is expected to have only one input stream.");
sorting_step->updateInputStream(child_node->children.front()->step->getOutputStream());
auto input_header = sorting_step->getInputStreams().front().header;
sorting_step->updateOutputStream(std::move(input_header));
auto description = parent_node->step->getStepDescription();
parent_step = std::make_unique<ExpressionStep>(child_step->getOutputStream(), std::move(unneeded_for_sorting));
parent_step = std::make_unique<DB::ExpressionStep>(child_step->getOutputStream(), std::move(unneeded_for_sorting));
parent_step->setStepDescription(description + " [lifted up part]");
// UnneededCalculations -> Sorting
}
}
namespace DB::QueryPlanOptimizations
{
size_t tryExecuteFunctionsAfterSorting(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes)
{
if (parent_node->children.size() != 1)
@ -55,12 +68,15 @@ size_t tryExecuteFunctionsAfterSorting(QueryPlan::Node * parent_node, QueryPlan:
if (unneeded_for_sorting->trivial())
return 0;
if (child_node->children.size() != 1)
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "ExpressionStep is expected to have only one input stream.");
// Sorting (parent_node) -> Expression (child_node)
auto & node_with_needed = nodes.emplace_back();
std::swap(node_with_needed.children, child_node->children);
child_node->children = {&node_with_needed};
node_with_needed.step
= std::make_unique<ExpressionStep>(node_with_needed.children.at(0)->step->getOutputStream(), std::move(needed_for_sorting));
= std::make_unique<ExpressionStep>(node_with_needed.children.front()->step->getOutputStream(), std::move(needed_for_sorting));
node_with_needed.step->setStepDescription(child_step->getStepDescription());
// Sorting (parent_node) -> so far the origin Expression (child_node) -> NeededCalculations (node_with_needed)

View File

@ -92,7 +92,7 @@ SortingStep::SortingStep(
void SortingStep::updateInputStream(DataStream input_stream)
{
input_streams.clear();
input_streams.push_back(std::move(input_stream));
input_streams.emplace_back(std::move(input_stream));
}
void SortingStep::updateOutputStream(Block result_header)

View File

@ -10,8 +10,8 @@ set max_block_size=40960;
-- MergeSortingTransform: Re-merging intermediate ORDER BY data (20 blocks with 819200 rows) to save memory consumption
-- MergeSortingTransform: Memory usage is lowered from 186.25 MiB to 95.00 MiB
-- MergeSortingTransform: Re-merging is not useful (memory usage was not lowered by remerge_sort_lowered_memory_bytes_ratio=2.0)
select repeat(toString(number), 11) v1, repeat(toString(number), 12) v2 from numbers(3e6) order by v1, v2 limit 400e3 format Null; -- { serverError 241 }
select repeat(toString(number), 11) v1, repeat(toString(number), 12) v2 from numbers(3e6) order by v1, v2 limit 400e3 settings remerge_sort_lowered_memory_bytes_ratio=2. format Null; -- { serverError 241 }
select number k, repeat(toString(number), 11) v1, repeat(toString(number), 12) v2 from numbers(3e6) order by v1, v2 limit 400e3 format Null; -- { serverError 241 }
select number k, repeat(toString(number), 11) v1, repeat(toString(number), 12) v2 from numbers(3e6) order by v1, v2 limit 400e3 settings remerge_sort_lowered_memory_bytes_ratio=2. format Null; -- { serverError 241 }
-- remerge_sort_lowered_memory_bytes_ratio 1.9 is good (need at least 1.91/0.98=1.94)
-- MergeSortingTransform: Re-merging intermediate ORDER BY data (20 blocks with 819200 rows) to save memory consumption

View File

@ -143,17 +143,11 @@ Filter
2 3
2 3
> function calculation should be done after sorting and limit (if possible)
> the whole Expression node could be moved after Sorting
Expression
Limit
Expression
Sorting
Expression
> Expression should be divided into two subexpressions and only one of them should be moved after Sorting
Expression
Limit
Expression
Expression (Before ORDER BY [lifted up part])
FUNCTION sipHash64
Sorting
Expression
Expression (Before ORDER BY)
FUNCTION plus
> this query should be executed without throwing an exception
0

View File

@ -198,14 +198,10 @@ $CLICKHOUSE_CLIENT -q "
) where a != 1 settings enable_optimize_predicate_expression = 0"
echo "> function calculation should be done after sorting and limit (if possible)"
echo "> the whole Expression node could be moved after Sorting"
$CLICKHOUSE_CLIENT -q "
explain select sipHash64(number) from numbers(100) order by number limit 5" |
sed 's/ //g' | grep -o "^ *\(Expression\|Limit\|Sorting\)"
echo "> Expression should be divided into two subexpressions and only one of them should be moved after Sorting"
$CLICKHOUSE_CLIENT -q "
explain select sipHash64(number) from numbers(100) order by number + 1 limit 5" |
sed 's/ //g' | grep -o "^ *\(Expression\|Limit\|Sorting\)"
explain actions = 1 select number as n, sipHash64(n) from numbers(100) order by number + 1 limit 5" |
sed 's/^ *//g' | grep -o "^ *\(Expression (Before ORDER BY.*)\|Sorting\|FUNCTION \w\+\)"
echo "> this query should be executed without throwing an exception"
$CLICKHOUSE_CLIENT -q "
select throwIf(number = 5) from (select * from numbers(10)) order by number limit 1"