Consider aliases when checking if sorting order is preserved by

expression
This commit is contained in:
Igor Nikonov 2022-08-09 11:27:17 +00:00
parent 711bb02cdf
commit 366ead3828
5 changed files with 58 additions and 8 deletions

View File

@ -1927,7 +1927,7 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown(
return actions; return actions;
} }
bool ActionsDAG::isSortingPreserved(const Block & header, const SortDescription & sort_description) const bool ActionsDAG::isSortingPreserved(const Block & input_header, const SortDescription & sort_description) const
{ {
if (sort_description.empty()) if (sort_description.empty())
return true; return true;
@ -1935,12 +1935,40 @@ bool ActionsDAG::isSortingPreserved(const Block & header, const SortDescription
if (hasArrayJoin()) if (hasArrayJoin())
return false; return false;
const Block & output_header = updateHeader(header); /// build reversed aliases, use it later to find out if some aliases refer to sorted columns
std::unordered_map<std::string_view, const String &> reversed_aliases;
for (const Node * node : index)
{
if (node->type == ActionType::ALIAS)
{
reversed_aliases.emplace(node->children.front()->result_name, node->result_name);
}
}
const Block & output_header = updateHeader(input_header);
for (const auto & desc : sort_description) for (const auto & desc : sort_description)
{ {
/// check if column is part of output header
/// if not, check if aliases in output header refers to the column
if (!output_header.findByName(desc.column_name)) if (!output_header.findByName(desc.column_name))
{
const auto it = reversed_aliases.find(desc.column_name);
if (it == reversed_aliases.end())
return false;
/// check if alias to sorted column is in output header
if (!output_header.findByName(it->second))
return false; return false;
} }
/// check that found colunm is not an alias
for (const Node * node : index)
{
if (node->type == ActionType::ALIAS && node->result_name == desc.column_name)
return false;
}
}
return true; return true;
} }

View File

@ -303,7 +303,7 @@ public:
const Names & available_inputs, const Names & available_inputs,
const ColumnsWithTypeAndName & all_inputs); const ColumnsWithTypeAndName & all_inputs);
bool isSortingPreserved(const Block & header, const SortDescription & sort_description) const; bool isSortingPreserved(const Block & input_header, const SortDescription & sort_description) const;
private: private:
Node & addNode(Node node); Node & addNode(Node node);

View File

@ -14,7 +14,7 @@ static ITransformingStep::Traits getTraits(const ActionsDAGPtr & expression, con
bool preserves_sorting = expression->isSortingPreserved(header, sort_description); bool preserves_sorting = expression->isSortingPreserved(header, sort_description);
if (remove_filter_column) if (remove_filter_column)
{ {
preserves_sorting = find_if( preserves_sorting &= find_if(
begin(sort_description), begin(sort_description),
end(sort_description), end(sort_description),
[&](const auto & column_desc) { return column_desc.column_name == filter_column_name; }) [&](const auto & column_desc) { return column_desc.column_name == filter_column_name; })

View File

@ -24,3 +24,15 @@ Sort Mode: Chunk: a ASC
Sort Mode: None Sort Mode: None
Sort Mode: None Sort Mode: None
Sort Mode: Chunk: a ASC Sort Mode: Chunk: a ASC
-- PLAN: aliases break sorting order
Sort Mode: Stream: a ASC
Sort Mode: Stream: a ASC
Sort Mode: None
Sort Mode: Stream: a ASC
Sort Mode: Port: a ASC
Sort Mode: Port: a ASC
-- PLAN: aliases DONT break sorting order
Sort Mode: Stream: x ASC, y ASC
Sort Mode: Stream: x ASC, y ASC
Sort Mode: Chunk: a ASC, b ASC
Sort Mode: Chunk: a ASC, b ASC

View File

@ -37,3 +37,13 @@ $CLICKHOUSE_CLIENT -nq "EXPLAIN PLAN sortmode=1 SELECT a FROM optimize_sorting W
$CLICKHOUSE_CLIENT -q "select '-- PLAN: FilterStep breaks sort mode'" $CLICKHOUSE_CLIENT -q "select '-- PLAN: FilterStep breaks sort mode'"
$CLICKHOUSE_CLIENT -nq "EXPLAIN PLAN sortmode=1 SELECT a > 0 FROM optimize_sorting WHERE a > 0" | eval $FIND_SORTMODE $CLICKHOUSE_CLIENT -nq "EXPLAIN PLAN sortmode=1 SELECT a > 0 FROM optimize_sorting WHERE a > 0" | eval $FIND_SORTMODE
$CLICKHOUSE_CLIENT -q "select '-- PLAN: aliases break sorting order'"
$CLICKHOUSE_CLIENT -nq "EXPLAIN PLAN sortmode=1 SELECT a FROM (SELECT sipHash64(a) AS a FROM (SELECT a FROM optimize_sorting ORDER BY a)) ORDER BY a" | eval $FIND_SORTMODE
# FIXME: we still do full sort here, - it's because, for most inner subqueury, sorting description contains original column names but header contains only aliases on those columns:
#| Header: x Int32 │
#│ y Int32 │
#│ Sort Mode: Chunk: a ASC, b ASC │
$CLICKHOUSE_CLIENT -q "select '-- PLAN: aliases DONT break sorting order'"
$CLICKHOUSE_CLIENT -nq "EXPLAIN PLAN sortmode=1 SELECT a, b FROM (SELECT x AS a, y AS b FROM (SELECT a AS x, b AS y FROM optimize_sorting) ORDER BY x, y)" | eval $FIND_SORTMODE