Consider aliases when checking if sorting order is preserved by

expression
This commit is contained in:
Igor Nikonov 2022-08-09 11:27:17 +00:00
parent 711bb02cdf
commit 366ead3828
5 changed files with 58 additions and 8 deletions

View File

@ -1927,7 +1927,7 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown(
return actions;
}
bool ActionsDAG::isSortingPreserved(const Block & header, const SortDescription & sort_description) const
bool ActionsDAG::isSortingPreserved(const Block & input_header, const SortDescription & sort_description) const
{
if (sort_description.empty())
return true;
@ -1935,12 +1935,40 @@ bool ActionsDAG::isSortingPreserved(const Block & header, const SortDescription
if (hasArrayJoin())
return false;
const Block & output_header = updateHeader(header);
/// build reversed aliases, use it later to find out if some aliases refer to sorted columns
std::unordered_map<std::string_view, const String &> reversed_aliases;
for (const Node * node : index)
{
if (node->type == ActionType::ALIAS)
{
reversed_aliases.emplace(node->children.front()->result_name, node->result_name);
}
}
const Block & output_header = updateHeader(input_header);
for (const auto & desc : sort_description)
{
/// check if column is part of output header
/// if not, check if aliases in output header refers to the column
if (!output_header.findByName(desc.column_name))
return false;
{
const auto it = reversed_aliases.find(desc.column_name);
if (it == reversed_aliases.end())
return false;
/// check if alias to sorted column is in output header
if (!output_header.findByName(it->second))
return false;
}
/// check that found colunm is not an alias
for (const Node * node : index)
{
if (node->type == ActionType::ALIAS && node->result_name == desc.column_name)
return false;
}
}
return true;
}

View File

@ -303,7 +303,7 @@ public:
const Names & available_inputs,
const ColumnsWithTypeAndName & all_inputs);
bool isSortingPreserved(const Block & header, const SortDescription & sort_description) const;
bool isSortingPreserved(const Block & input_header, const SortDescription & sort_description) const;
private:
Node & addNode(Node node);

View File

@ -14,10 +14,10 @@ static ITransformingStep::Traits getTraits(const ActionsDAGPtr & expression, con
bool preserves_sorting = expression->isSortingPreserved(header, sort_description);
if (remove_filter_column)
{
preserves_sorting = find_if(
begin(sort_description),
end(sort_description),
[&](const auto & column_desc) { return column_desc.column_name == filter_column_name; })
preserves_sorting &= find_if(
begin(sort_description),
end(sort_description),
[&](const auto & column_desc) { return column_desc.column_name == filter_column_name; })
== sort_description.end();
}
return ITransformingStep::Traits

View File

@ -24,3 +24,15 @@ Sort Mode: Chunk: a ASC
Sort Mode: None
Sort Mode: None
Sort Mode: Chunk: a ASC
-- PLAN: aliases break sorting order
Sort Mode: Stream: a ASC
Sort Mode: Stream: a ASC
Sort Mode: None
Sort Mode: Stream: a ASC
Sort Mode: Port: a ASC
Sort Mode: Port: a ASC
-- PLAN: aliases DONT break sorting order
Sort Mode: Stream: x ASC, y ASC
Sort Mode: Stream: x ASC, y ASC
Sort Mode: Chunk: a ASC, b ASC
Sort Mode: Chunk: a ASC, b ASC

View File

@ -37,3 +37,13 @@ $CLICKHOUSE_CLIENT -nq "EXPLAIN PLAN sortmode=1 SELECT a FROM optimize_sorting W
$CLICKHOUSE_CLIENT -q "select '-- PLAN: FilterStep breaks sort mode'"
$CLICKHOUSE_CLIENT -nq "EXPLAIN PLAN sortmode=1 SELECT a > 0 FROM optimize_sorting WHERE a > 0" | eval $FIND_SORTMODE
$CLICKHOUSE_CLIENT -q "select '-- PLAN: aliases break sorting order'"
$CLICKHOUSE_CLIENT -nq "EXPLAIN PLAN sortmode=1 SELECT a FROM (SELECT sipHash64(a) AS a FROM (SELECT a FROM optimize_sorting ORDER BY a)) ORDER BY a" | eval $FIND_SORTMODE
# FIXME: we still do full sort here, - it's because, for most inner subqueury, sorting description contains original column names but header contains only aliases on those columns:
#| Header: x Int32 │
#│ y Int32 │
#│ Sort Mode: Chunk: a ASC, b ASC │
$CLICKHOUSE_CLIENT -q "select '-- PLAN: aliases DONT break sorting order'"
$CLICKHOUSE_CLIENT -nq "EXPLAIN PLAN sortmode=1 SELECT a, b FROM (SELECT x AS a, y AS b FROM (SELECT a AS x, b AS y FROM optimize_sorting) ORDER BY x, y)" | eval $FIND_SORTMODE