Update after refactoring ALIAS columns computation

This commit is contained in:
vdimir 2024-03-20 17:05:34 +00:00
parent 2753042387
commit 42d59a8fd1
No known key found for this signature in database
GPG Key ID: 6EE4CE2BEDC51862
5 changed files with 135 additions and 29 deletions

View File

@ -1392,6 +1392,8 @@ private:
const NamesAndTypes & matched_columns,
const IdentifierResolveScope & scope);
void updateMatchedColumnsFromJoinUsing(QueryTreeNodesWithNames & result_matched_column_nodes_with_names, const QueryTreeNodePtr & source_table_expression, IdentifierResolveScope & scope);
QueryTreeNodesWithNames resolveQualifiedMatcher(QueryTreeNodePtr & matcher_node, IdentifierResolveScope & scope);
QueryTreeNodesWithNames resolveUnqualifiedMatcher(QueryTreeNodePtr & matcher_node, IdentifierResolveScope & scope);
@ -3543,9 +3545,14 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo
if (scope.join_use_nulls)
{
auto it = node_to_projection_name.find(resolved_identifier);
auto nullable_resolved_identifier = convertJoinedColumnTypeToNullIfNeeded(resolved_identifier, join_kind, resolved_side, scope);
if (nullable_resolved_identifier)
{
resolved_identifier = nullable_resolved_identifier;
if (it != node_to_projection_name.end())
node_to_projection_name.emplace(resolved_identifier, it->second);
}
}
return resolved_identifier;
@ -4204,6 +4211,93 @@ QueryAnalyzer::QueryTreeNodesWithNames QueryAnalyzer::getMatchedColumnNodesWithN
return matched_column_nodes_with_names;
}
bool hasTableExpressionInJoinTree(const QueryTreeNodePtr & join_tree_node, const QueryTreeNodePtr & table_expression)
{
QueryTreeNodes nodes_to_process;
nodes_to_process.push_back(join_tree_node);
while (!nodes_to_process.empty())
{
auto node_to_process = std::move(nodes_to_process.back());
nodes_to_process.pop_back();
if (node_to_process == table_expression)
return true;
if (node_to_process->getNodeType() == QueryTreeNodeType::JOIN)
{
const auto & join_node = node_to_process->as<JoinNode &>();
nodes_to_process.push_back(join_node.getLeftTableExpression());
nodes_to_process.push_back(join_node.getRightTableExpression());
}
}
return false;
}
/// Columns that resolved from matcher can also match columns from JOIN USING.
/// In that case we update type to type of column in USING section.
/// TODO: It's not completely correct for qualified matchers, so t1.* should be resolved to left table column type.
/// But in planner we do not distinguish such cases.
void QueryAnalyzer::updateMatchedColumnsFromJoinUsing(
QueryTreeNodesWithNames & result_matched_column_nodes_with_names,
const QueryTreeNodePtr & source_table_expression,
IdentifierResolveScope & scope)
{
auto * nearest_query_scope = scope.getNearestQueryScope();
auto * nearest_query_scope_query_node = nearest_query_scope ? nearest_query_scope->scope_node->as<QueryNode>() : nullptr;
/// If there are no parent query scope or query scope does not have join tree
if (!nearest_query_scope_query_node || !nearest_query_scope_query_node->getJoinTree())
{
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
"There are no table sources. In scope {}",
scope.scope_node->formatASTForErrorMessage());
}
const auto & join_tree = nearest_query_scope_query_node->getJoinTree();
const auto * join_node = join_tree->as<JoinNode>();
if (join_node && join_node->isUsingJoinExpression())
{
const auto & join_using_list = join_node->getJoinExpression()->as<ListNode &>();
const auto & join_using_nodes = join_using_list.getNodes();
for (auto & [matched_column_node, _] : result_matched_column_nodes_with_names)
{
auto & matched_column_node_typed = matched_column_node->as<ColumnNode &>();
const auto & matched_column_name = matched_column_node_typed.getColumnName();
for (const auto & join_using_node : join_using_nodes)
{
auto & join_using_column_node = join_using_node->as<ColumnNode &>();
const auto & join_using_column_name = join_using_column_node.getColumnName();
if (matched_column_name != join_using_column_name)
continue;
const auto & join_using_column_nodes_list = join_using_column_node.getExpressionOrThrow()->as<ListNode &>();
const auto & join_using_column_nodes = join_using_column_nodes_list.getNodes();
auto it = node_to_projection_name.find(matched_column_node);
if (hasTableExpressionInJoinTree(join_node->getLeftTableExpression(), source_table_expression))
matched_column_node = join_using_column_nodes.at(0);
else if (hasTableExpressionInJoinTree(join_node->getRightTableExpression(), source_table_expression))
matched_column_node = join_using_column_nodes.at(1);
else
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Cannot find column {} in JOIN USING section {}",
matched_column_node->dumpTree(), join_node->dumpTree());
matched_column_node = matched_column_node->clone();
if (it != node_to_projection_name.end())
node_to_projection_name.emplace(matched_column_node, it->second);
matched_column_node->as<ColumnNode &>().setColumnType(join_using_column_node.getResultType());
}
}
}
}
/** Resolve qualified tree matcher.
*
* First try to match qualified identifier to expression. If qualified identifier matched expression node then
@ -4321,6 +4415,8 @@ QueryAnalyzer::QueryTreeNodesWithNames QueryAnalyzer::resolveQualifiedMatcher(Qu
matched_columns,
scope);
updateMatchedColumnsFromJoinUsing(result_matched_column_nodes_with_names, table_expression_node, scope);
return result_matched_column_nodes_with_names;
}

View File

@ -31,9 +31,9 @@
┌─database─┬─name─┐
│ system │ one │
└──────────┴──────┘
┌─database─┬─t.name─┐
│ system │ one
└──────────┴────────
┌─database─┬─name─┐
│ system │ one │
└──────────┴──────┘
2
2
2

View File

@ -56,20 +56,20 @@ Header: id UInt64
rhs.id UInt64
rhs.value_1 String
Actions: INPUT : 0 -> __table1.id UInt64 : 0
INPUT :: 1 -> __table1.value_2 UInt64 : 1
INPUT : 2 -> __table1.value_1 String : 2
INPUT : 1 -> __table1.value_1 String : 1
INPUT :: 2 -> __table1.value_2 UInt64 : 2
INPUT : 3 -> __table2.value_1 String : 3
INPUT :: 4 -> __table2.value_2 UInt64 : 4
INPUT : 5 -> __table2.id UInt64 : 5
ALIAS __table1.id :: 0 -> id UInt64 : 6
ALIAS __table1.value_1 :: 2 -> value_1 String : 0
ALIAS __table2.value_1 :: 3 -> rhs.value_1 String : 2
ALIAS __table1.value_1 :: 1 -> value_1 String : 0
ALIAS __table2.value_1 :: 3 -> rhs.value_1 String : 1
ALIAS __table2.id :: 5 -> rhs.id UInt64 : 3
Positions: 6 0 3 2
Positions: 6 0 3 1
Join (JOIN FillRightFirst)
Header: __table1.id UInt64
__table1.value_2 UInt64
__table1.value_1 String
__table1.value_2 UInt64
__table2.value_1 String
__table2.value_2 UInt64
__table2.id UInt64
@ -80,37 +80,37 @@ Positions: 6 0 3 2
Clauses: [(__table1.id, __table1.value_2) = (__table2.id, __table2.value_2)]
Expression ((JOIN actions + Change column names to column identifiers))
Header: __table1.id UInt64
__table1.value_2 UInt64
__table1.value_1 String
__table1.value_2 UInt64
Actions: INPUT : 0 -> id UInt64 : 0
INPUT : 1 -> value_2 UInt64 : 1
INPUT : 2 -> value_1 String : 2
INPUT : 1 -> value_1 String : 1
INPUT : 2 -> value_2 UInt64 : 2
ALIAS id :: 0 -> __table1.id UInt64 : 3
ALIAS value_2 :: 1 -> __table1.value_2 UInt64 : 0
ALIAS value_1 :: 2 -> __table1.value_1 String : 1
ALIAS value_1 :: 1 -> __table1.value_1 String : 0
ALIAS value_2 :: 2 -> __table1.value_2 UInt64 : 1
Positions: 3 0 1
ReadFromMergeTree (default.test_table_1)
Header: id UInt64
value_2 UInt64
value_1 String
value_2 UInt64
ReadType: Default
Parts: 1
Granules: 1
Expression ((JOIN actions + Change column names to column identifiers))
Header: __table2.id UInt64
__table2.value_2 UInt64
__table2.value_1 String
__table2.value_2 UInt64
Actions: INPUT : 0 -> id UInt64 : 0
INPUT : 1 -> value_2 UInt64 : 1
INPUT : 2 -> value_1 String : 2
INPUT : 1 -> value_1 String : 1
INPUT : 2 -> value_2 UInt64 : 2
ALIAS id :: 0 -> __table2.id UInt64 : 3
ALIAS value_2 :: 1 -> __table2.value_2 UInt64 : 0
ALIAS value_1 :: 2 -> __table2.value_1 String : 1
ALIAS value_1 :: 1 -> __table2.value_1 String : 0
ALIAS value_2 :: 2 -> __table2.value_2 UInt64 : 1
Positions: 3 0 1
ReadFromMergeTree (default.test_table_2)
Header: id UInt64
value_2 UInt64
value_1 String
value_2 UInt64
ReadType: Default
Parts: 1
Granules: 1

View File

@ -1,3 +1,5 @@
{% for column_expression_type in ['ALIAS', 'MATERIALIZED'] -%}
{{ column_expression_type }}
1
1
369 124 123 b
@ -6,7 +8,7 @@
3693 1231 a 1231
3693 1232 1231 1231 a
a
-- {echoOn }
-- { echoOn }
-- USING alias column contains default in old analyzer (but both queries below should have the same result)
SELECT y * 2, s || 'a' FROM t1 FULL JOIN t2 USING (y) ORDER BY ALL SETTINGS allow_experimental_analyzer = 1;
738 ba
@ -41,9 +43,9 @@ SELECT (1, t1.*, t2.*) FROM (SELECT s, y FROM t1) t1 FULL JOIN (SELECT y FROM t2
(1,'a',3693,0)
(1,'b',369,369)
SELECT t1.z, t2.z, t3.z FROM t1 FULL JOIN t2 USING (y) FULL JOIN t3 USING (y) ORDER BY 1,2,3 SETTINGS allow_experimental_analyzer = 1;
1 0 43
1 48 1
124 124 1
0 0 43
0 48 0
124 124 0
1232 0 1232
SELECT * FROM t1 FULL JOIN t2 USING (y) FULL JOIN t3 USING (y) ORDER BY 1,2,3 SETTINGS allow_experimental_analyzer = 1;
126 0 0 42
@ -79,3 +81,4 @@ b
DROP TABLE IF EXISTS t1;
DROP TABLE IF EXISTS t2;
DROP TABLE IF EXISTS t3;
{% endfor -%}

View File

@ -2,10 +2,16 @@ DROP TABLE IF EXISTS t1;
DROP TABLE IF EXISTS t2;
DROP TABLE IF EXISTS t3;
CREATE TABLE t1 (x Int16, y Int64 ALIAS x + x * 2, z ALIAS x + 1, s String) ENGINE=MergeTree() ORDER BY x;
{% for column_expression_type in ['ALIAS', 'MATERIALIZED'] %}
-- { echoOff }
SELECT '{{ column_expression_type }}';
CREATE TABLE t1 (x Int16, y Int64 {{ column_expression_type }} x + x * 2, z {{ column_expression_type }} x + 1, s String) ENGINE=MergeTree() ORDER BY x;
CREATE TABLE t2 (y Int128, z Int16) ENGINE=MergeTree() ORDER BY y;
CREATE TABLE t3 (x Int16, y Int64 ALIAS x + x * 2, z ALIAS x + 1) ENGINE=MergeTree() ORDER BY x;
CREATE TABLE t3 (x Int16, y Int64 {{ column_expression_type }} x + x * 2, z {{ column_expression_type }} x + 1) ENGINE=MergeTree() ORDER BY x;
INSERT INTO t1 VALUES (1231, 'a'), (123, 'b');
@ -26,7 +32,7 @@ SELECT * FROM t1 INNER JOIN t3 USING (y) SETTINGS allow_experimental_analyzer =
SELECT * FROM t3 INNER JOIN t1 USING (y, z) SETTINGS allow_experimental_analyzer = 1;
SELECT s FROM t1 INNER JOIN t3 USING (y);
-- {echoOn }
-- { echoOn }
-- USING alias column contains default in old analyzer (but both queries below should have the same result)
SELECT y * 2, s || 'a' FROM t1 FULL JOIN t2 USING (y) ORDER BY ALL SETTINGS allow_experimental_analyzer = 1;
SELECT y * 2, s || 'a' FROM (SELECT s, y FROM t1) t1 FULL JOIN (SELECT y FROM t2) t2 USING (y) ORDER BY ALL;
@ -51,7 +57,8 @@ SELECT y FROM (SELECT s, y FROM t1) t1 FULL JOIN (SELECT y FROM t2) t2 USING (y)
SELECT s FROM t1 FULL JOIN t2 USING (y) ORDER BY ALL;
SELECT s FROM (SELECT s, y FROM t1) t1 FULL JOIN (SELECT y FROM t2) t2 USING (y) ORDER BY ALL;
DROP TABLE IF EXISTS t1;
DROP TABLE IF EXISTS t2;
DROP TABLE IF EXISTS t3;
{% endfor %}