Merge pull request #62346 from ClickHouse/vdimir/analyzer_comapre_columns

Analyzer: Fix validateAggregates for tables with different aliases
This commit is contained in:
vdimir 2024-05-17 20:00:04 +00:00 committed by GitHub
commit e5ad196317
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 83 additions and 1 deletions

View File

@ -138,6 +138,52 @@ public:
}
private:
static bool areColumnSourcesEqual(const QueryTreeNodePtr & lhs, const QueryTreeNodePtr & rhs)
{
using NodePair = std::pair<const IQueryTreeNode *, const IQueryTreeNode *>;
std::vector<NodePair> nodes_to_process;
nodes_to_process.emplace_back(lhs.get(), rhs.get());
while (!nodes_to_process.empty())
{
const auto [lhs_node, rhs_node] = nodes_to_process.back();
nodes_to_process.pop_back();
if (lhs_node->getNodeType() != rhs_node->getNodeType())
return false;
if (lhs_node->getNodeType() == QueryTreeNodeType::COLUMN)
{
const auto * lhs_column_node = lhs_node->as<ColumnNode>();
const auto * rhs_column_node = rhs_node->as<ColumnNode>();
if (!lhs_column_node->getColumnSource()->isEqual(*rhs_column_node->getColumnSource()))
return false;
}
const auto & lhs_children = lhs_node->getChildren();
const auto & rhs_children = rhs_node->getChildren();
if (lhs_children.size() != rhs_children.size())
return false;
for (size_t i = 0; i < lhs_children.size(); ++i)
{
const auto & lhs_child = lhs_children[i];
const auto & rhs_child = rhs_children[i];
if (!lhs_child && !rhs_child)
continue;
else if (lhs_child && !rhs_child)
return false;
else if (!lhs_child && rhs_child)
return false;
nodes_to_process.emplace_back(lhs_child.get(), rhs_child.get());
}
}
return true;
}
bool nodeIsAggregateFunctionOrInGroupByKeys(const QueryTreeNodePtr & node) const
{
if (auto * function_node = node->as<FunctionNode>())
@ -145,8 +191,17 @@ private:
return true;
for (const auto & group_by_key_node : group_by_keys_nodes)
{
if (node->isEqual(*group_by_key_node, {.compare_aliases = false}))
return true;
{
/** Column sources should be compared with aliases for correct GROUP BY keys validation,
* otherwise t2.x and t1.x will be considered as the same column:
* SELECT t2.x FROM t1 JOIN t1 as t2 ON t1.x = t2.x GROUP BY t1.x;
*/
if (areColumnSourcesEqual(node, group_by_key_node))
return true;
}
}
return false;
}

View File

@ -0,0 +1,6 @@
1
2
3
1 1
2 2
3 3

View File

@ -0,0 +1,21 @@
DROP TABLE IF EXISTS t1;
CREATE TABLE t1 (x Int32) ENGINE = MergeTree ORDER BY x;
INSERT INTO t1 VALUES (1), (2), (3);
SET allow_experimental_analyzer = 1;
SELECT t2.x FROM t1 JOIN t1 as t2 ON t1.x = t2.x GROUP BY t1.x; -- { serverError NOT_AN_AGGREGATE }
SELECT t2.number FROM numbers(10) as t1 JOIN numbers(10) as t2 ON t1.number = t2.number GROUP BY t1.number; -- { serverError NOT_AN_AGGREGATE }
SELECT t2.a FROM (SELECT x as a FROM t1) as t1 JOIN (SELECT x as a FROM t1) as t2 ON t1.a = t2.a GROUP BY t1.a; -- { serverError NOT_AN_AGGREGATE }
SELECT t2.a FROM (SELECT x as a FROM t1 UNION ALL SELECT x as a FROM t1) as t1 JOIN (SELECT x as a FROM t1 UNION ALL SELECT x as a FROM t1) as t2 ON t1.a = t2.a GROUP BY t1.a; -- { serverError NOT_AN_AGGREGATE }
SELECT t2.number FROM numbers(10) JOIN numbers(10) as t2 ON number = t2.number GROUP BY number SETTINGS joined_subquery_requires_alias = 0; -- { serverError NOT_AN_AGGREGATE }
SELECT t2.x FROM t1 as t0 JOIN t1 as t2 ON t1.x = t2.x GROUP BY t1.x; -- { serverError NOT_AN_AGGREGATE }
SELECT t2.x FROM t1 as t0 JOIN t1 as t2 ON t0.x = t2.x GROUP BY t0.x; -- { serverError NOT_AN_AGGREGATE }
SELECT t2.x FROM t1 JOIN t1 as t2 ON t1.x = t2.x GROUP BY x; -- { serverError NOT_AN_AGGREGATE }
SELECT t1.x FROM t1 JOIN t1 as t2 ON t1.x = t2.x GROUP BY t2.x; -- { serverError NOT_AN_AGGREGATE }
SELECT x FROM t1 JOIN t1 as t2 ON t1.x = t2.x GROUP BY t2.x; -- { serverError NOT_AN_AGGREGATE }
SELECT x FROM t1 JOIN t1 as t2 USING (x) GROUP BY t2.x; -- { serverError NOT_AN_AGGREGATE }
SELECT t1.x FROM t1 JOIN t1 as t2 ON t1.x = t2.x GROUP BY x ORDER BY ALL;
SELECT x, sum(t2.x) FROM t1 JOIN t1 as t2 ON t1.x = t2.x GROUP BY t1.x ORDER BY ALL;