Fix join_use_nulls in analyzer

This commit is contained in:
vdimir 2023-05-01 15:09:26 +00:00
parent bb2acb50d2
commit 40d658e467
No known key found for this signature in database
GPG Key ID: 6EE4CE2BEDC51862
3 changed files with 612 additions and 18 deletions

View File

@ -1205,6 +1205,29 @@ private:
static std::string rewriteAggregateFunctionNameIfNeeded(const std::string & aggregate_function_name, const ContextPtr & context);
static std::optional<JoinTableSide> getColumnSideFromJoinTree(QueryTreeNodePtr & resolved_identifier, const JoinNode & join_node)
{
const auto * column_src = resolved_identifier->as<ColumnNode &>().getColumnSource().get();
if (join_node.getLeftTableExpression().get() == column_src)
return JoinTableSide::Left;
if (join_node.getRightTableExpression().get() == column_src)
return JoinTableSide::Right;
return {};
}
static void convertJoinedColumnTypeToNullIfNeeded(QueryTreeNodePtr & resolved_identifier, const JoinKind & join_kind, std::optional<JoinTableSide> resolved_side)
{
if (resolved_identifier->getNodeType() == QueryTreeNodeType::COLUMN &&
(isFull(join_kind) ||
(isLeft(join_kind) && resolved_side && *resolved_side == JoinTableSide::Right) ||
(isRight(join_kind) && resolved_side && *resolved_side == JoinTableSide::Left)))
{
auto & resolved_column = resolved_identifier->as<ColumnNode &>();
resolved_column.setColumnType(makeNullableOrLowCardinalityNullable(resolved_column.getColumnType()));
}
}
/// Resolve identifier functions
static QueryTreeNodePtr tryResolveTableIdentifierFromDatabaseCatalog(const Identifier & table_identifier, ContextPtr context);
@ -2982,6 +3005,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo
QueryTreeNodePtr resolved_identifier;
JoinKind join_kind = from_join_node.getKind();
bool join_use_nulls = scope.context->getSettingsRef().join_use_nulls;
if (left_resolved_identifier && right_resolved_identifier)
{
@ -3027,19 +3051,31 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo
*
* Otherwise we prefer column from left table.
*/
if (identifier_path_part == right_column_source_alias)
return right_resolved_identifier;
else if (!left_column_source_alias.empty() &&
right_column_source_alias.empty() &&
identifier_path_part != left_column_source_alias)
return right_resolved_identifier;
bool column_resolved_using_right_alias = identifier_path_part == right_column_source_alias;
bool column_resolved_without_using_left_alias = !left_column_source_alias.empty()
&& right_column_source_alias.empty()
&& identifier_path_part != left_column_source_alias;
if (column_resolved_using_right_alias || column_resolved_without_using_left_alias)
{
resolved_side = JoinTableSide::Right;
resolved_identifier = right_resolved_identifier;
}
else
{
resolved_side = JoinTableSide::Left;
resolved_identifier = left_resolved_identifier;
}
}
else
{
resolved_side = JoinTableSide::Left;
resolved_identifier = left_resolved_identifier;
}
return left_resolved_identifier;
}
else if (scope.joins_count == 1 && scope.context->getSettingsRef().single_join_prefer_left_table)
{
return left_resolved_identifier;
resolved_side = JoinTableSide::Left;
resolved_identifier = left_resolved_identifier;
}
else
{
@ -3092,17 +3128,10 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo
if (join_node_in_resolve_process || !resolved_identifier)
return resolved_identifier;
bool join_use_nulls = scope.context->getSettingsRef().join_use_nulls;
if (join_use_nulls &&
resolved_identifier->getNodeType() == QueryTreeNodeType::COLUMN &&
(isFull(join_kind) ||
(isLeft(join_kind) && resolved_side && *resolved_side == JoinTableSide::Right) ||
(isRight(join_kind) && resolved_side && *resolved_side == JoinTableSide::Left)))
if (join_use_nulls)
{
resolved_identifier = resolved_identifier->clone();
auto & resolved_column = resolved_identifier->as<ColumnNode &>();
resolved_column.setColumnType(makeNullableOrLowCardinalityNullable(resolved_column.getColumnType()));
convertJoinedColumnTypeToNullIfNeeded(resolved_identifier, join_kind, resolved_side);
}
return resolved_identifier;
@ -4001,6 +4030,22 @@ ProjectionNames QueryAnalyzer::resolveMatcher(QueryTreeNodePtr & matcher_node, I
else
matched_expression_nodes_with_names = resolveUnqualifiedMatcher(matcher_node, scope);
if (scope.context->getSettingsRef().join_use_nulls)
{
const auto * nearest_query_scope = scope.getNearestQueryScope();
const QueryNode * nearest_scope_query_node = nearest_query_scope ? nearest_query_scope->scope_node->as<QueryNode>() : nullptr;
const QueryTreeNodePtr & nearest_scope_join_tree = nearest_scope_query_node ? nearest_scope_query_node->getJoinTree() : nullptr;
const JoinNode * nearest_scope_join_node = nearest_scope_join_tree ? nearest_scope_join_tree->as<JoinNode>() : nullptr;
if (nearest_scope_join_node)
{
for (auto & [node, node_name] : matched_expression_nodes_with_names)
{
auto join_identifier_side = getColumnSideFromJoinTree(node, *nearest_scope_join_node);
convertJoinedColumnTypeToNullIfNeeded(node, nearest_scope_join_node->getKind(), join_identifier_side);
}
}
}
std::unordered_map<const IColumnTransformerNode *, std::unordered_set<std::string>> strict_transformer_to_used_column_names;
for (const auto & transformer : matcher_node_typed.getColumnTransformers().getNodes())
{

View File

@ -0,0 +1,430 @@
-- { echoOn }
SELECT '============ LEFT JOIN ============' FORMAT Null;
SELECT a, toTypeName(a)
FROM (
SELECT *
FROM (SELECT 1 :: Int32 as a) t1
LEFT JOIN (SELECT 2 :: UInt32 as a) t2
ON t1.a = t2.a
) ORDER BY 1;
1 Int32
SELECT *, * APPLY toTypeName
FROM (
SELECT a
FROM (SELECT 1 :: Int32 as a) t1
LEFT JOIN (SELECT 2 :: UInt32 as a) t2
ON t1.a = t2.a
) ORDER BY 1;
1 Int32
SELECT *, * APPLY toTypeName
FROM (
SELECT *
FROM (SELECT 1 :: Int32 as a) t1
LEFT JOIN (SELECT 2 :: UInt32 as a) t2
ON t1.a = t2.a
) ORDER BY 1;
1 \N Int32 Nullable(UInt32)
SELECT *, * APPLY toTypeName
FROM (
SELECT t1.*
FROM (SELECT 1 :: Int32 as a) t1
LEFT JOIN (SELECT 2 :: UInt32 as a) t2
ON t1.a = t2.a
) ORDER BY 1;
1 Int32
SELECT *, * APPLY toTypeName
FROM (
SELECT t2.*
FROM (SELECT 1 :: Int32 as a) t1
LEFT JOIN (SELECT 2 :: UInt32 as a) t2
ON t1.a = t2.a
) ORDER BY 1;
\N Nullable(UInt32)
SELECT *, * APPLY toTypeName
FROM (
SELECT t2.*
FROM t1
LEFT JOIN t2
ON t1.a = t2.a
) ORDER BY 1;
\N \N Nullable(UInt32) Nullable(UInt32)
SELECT *, * APPLY toTypeName
FROM (
SELECT a
FROM (SELECT 1 :: Int32 as a) t1
LEFT JOIN (SELECT 2 :: UInt32 as a) t2
USING (a)
) ORDER BY 1;
1 Int64
SELECT *, * APPLY toTypeName
FROM (
SELECT *
FROM (SELECT 1 :: Int32 as a) t1
LEFT JOIN (SELECT 2 :: UInt32 as a) t2
USING (a)
) ORDER BY 1;
1 Int64
SELECT *, * APPLY toTypeName
FROM (
SELECT t1.*
FROM (SELECT 1 :: Int32 as a) t1
LEFT JOIN (SELECT 2 :: UInt32 as a) t2
USING (a)
) ORDER BY 1;
1 Int32
SELECT *, * APPLY toTypeName
FROM (
SELECT t2.*
FROM (SELECT 1 :: Int32 as a) t1
LEFT JOIN (SELECT 2 :: UInt32 as a) t2
USING (a)
) ORDER BY 1;
\N Nullable(UInt32)
SELECT *, * APPLY toTypeName
FROM (
SELECT *
FROM t1
LEFT JOIN t2
USING (a)
) ORDER BY 1;
1 \N Int64 Nullable(UInt32)
SELECT a, toTypeName(a)
FROM (
SELECT *
FROM (SELECT 1 :: Int32 as a) t1
LEFT JOIN (SELECT 2 :: UInt32 as key) t2
ON t1.a = t2.key
) ORDER BY 1;
1 Int32
SELECT *, * APPLY toTypeName
FROM (
SELECT a
FROM (SELECT 1 :: Int32 as a) t1
LEFT JOIN (SELECT 2 :: UInt32 as key) t2
ON t1.a = t2.key
) ORDER BY 1;
1 Int32
SELECT *, * APPLY toTypeName
FROM (
SELECT *
FROM (SELECT 1 :: Int32 as a) t1
LEFT JOIN (SELECT 2 :: UInt32 as key) t2
ON t1.a = t2.key
) ORDER BY 1;
1 \N Int32 Nullable(UInt32)
SELECT *, * APPLY toTypeName
FROM (
SELECT t1.*
FROM (SELECT 1 :: Int32 as a) t1
LEFT JOIN (SELECT 2 :: UInt32 as key) t2
ON t1.a = t2.key
) ORDER BY 1;
1 Int32
SELECT *, * APPLY toTypeName
FROM (
SELECT t2.*
FROM (SELECT 1 :: Int32 as a) t1
LEFT JOIN (SELECT 2 :: UInt32 as key) t2
ON t1.a = t2.key
) ORDER BY 1;
\N Nullable(UInt32)
SELECT *, * APPLY toTypeName
FROM (
SELECT t2.*
FROM t1
LEFT JOIN t2
ON t1.a = t2.key
) ORDER BY 1;
\N \N Nullable(UInt32) Nullable(UInt32)
SELECT '============ RIGHT JOIN ============' FORMAT Null;
SELECT a, toTypeName(a)
FROM (
SELECT *
FROM (SELECT 1 :: Int32 as a) t1
RIGHT JOIN (SELECT 2 :: UInt32 as a) t2
ON t1.a = t2.a
) ORDER BY 1;
\N Nullable(Int32)
SELECT *, * APPLY toTypeName
FROM (
SELECT a
FROM (SELECT 1 :: Int32 as a) t1
RIGHT JOIN (SELECT 2 :: UInt32 as a) t2
ON t1.a = t2.a
) ORDER BY 1;
\N Nullable(Int32)
SELECT *, * APPLY toTypeName
FROM (
SELECT *
FROM (SELECT 1 :: Int32 as a) t1
RIGHT JOIN (SELECT 2 :: UInt32 as a) t2
ON t1.a = t2.a
) ORDER BY 1;
\N 2 Nullable(Int32) UInt32
SELECT *, * APPLY toTypeName
FROM (
SELECT t1.*
FROM (SELECT 1 :: Int32 as a) t1
RIGHT JOIN (SELECT 2 :: UInt32 as a) t2
ON t1.a = t2.a
) ORDER BY 1;
\N Nullable(Int32)
SELECT *, * APPLY toTypeName
FROM (
SELECT t2.*
FROM (SELECT 1 :: Int32 as a) t1
RIGHT JOIN (SELECT 2 :: UInt32 as a) t2
ON t1.a = t2.a
) ORDER BY 1;
2 UInt32
SELECT *, * APPLY toTypeName
FROM (
SELECT t2.*
FROM t1
RIGHT JOIN t2
ON t1.a = t2.a
) ORDER BY 1;
2 2 UInt32 UInt32
SELECT *, * APPLY toTypeName
FROM (
SELECT a
FROM (SELECT 1 :: Int32 as a) t1
RIGHT JOIN (SELECT 2 :: UInt32 as a) t2
USING (a)
) ORDER BY 1;
2 Int64
SELECT *, * APPLY toTypeName
FROM (
SELECT *
FROM (SELECT 1 :: Int32 as a) t1
RIGHT JOIN (SELECT 2 :: UInt32 as a) t2
USING (a)
) ORDER BY 1;
2 Int64
SELECT *, * APPLY toTypeName
FROM (
SELECT t1.*
FROM (SELECT 1 :: Int32 as a) t1
RIGHT JOIN (SELECT 2 :: UInt32 as a) t2
USING (a)
) ORDER BY 1;
2 Nullable(Int32)
SELECT *, * APPLY toTypeName
FROM (
SELECT t2.*
FROM (SELECT 1 :: Int32 as a) t1
RIGHT JOIN (SELECT 2 :: UInt32 as a) t2
USING (a)
) ORDER BY 1;
2 UInt32
SELECT *, * APPLY toTypeName
FROM (
SELECT *
FROM t1
RIGHT JOIN t2
USING (a)
) ORDER BY 1;
2 2 Int64 UInt32
SELECT a, toTypeName(a)
FROM (
SELECT *
FROM (SELECT 1 :: Int32 as a) t1
RIGHT JOIN (SELECT 2 :: UInt32 as key) t2
ON t1.a = t2.key
) ORDER BY 1;
\N Nullable(Int32)
SELECT *, * APPLY toTypeName
FROM (
SELECT a
FROM (SELECT 1 :: Int32 as a) t1
RIGHT JOIN (SELECT 2 :: UInt32 as key) t2
ON t1.a = t2.key
) ORDER BY 1;
\N Nullable(Int32)
SELECT *, * APPLY toTypeName
FROM (
SELECT *
FROM (SELECT 1 :: Int32 as a) t1
RIGHT JOIN (SELECT 2 :: UInt32 as key) t2
ON t1.a = t2.key
) ORDER BY 1;
\N 2 Nullable(Int32) UInt32
SELECT *, * APPLY toTypeName
FROM (
SELECT t1.*
FROM (SELECT 1 :: Int32 as a) t1
RIGHT JOIN (SELECT 2 :: UInt32 as key) t2
ON t1.a = t2.key
) ORDER BY 1;
\N Nullable(Int32)
SELECT *, * APPLY toTypeName
FROM (
SELECT t2.*
FROM (SELECT 1 :: Int32 as a) t1
RIGHT JOIN (SELECT 2 :: UInt32 as key) t2
ON t1.a = t2.key
) ORDER BY 1;
2 UInt32
SELECT *, * APPLY toTypeName
FROM (
SELECT t2.*
FROM t1
RIGHT JOIN t2
ON t1.a = t2.key
) ORDER BY 1;
2 2 UInt32 UInt32
SELECT '============ FULL JOIN ============' FORMAT Null;
SELECT a, toTypeName(a)
FROM (
SELECT *
FROM (SELECT 1 :: Int32 as a) t1
FULL JOIN (SELECT 2 :: UInt32 as a) t2
ON t1.a = t2.a
) ORDER BY 1;
1 Nullable(Int32)
\N Nullable(Int32)
SELECT *, * APPLY toTypeName
FROM (
SELECT a
FROM (SELECT 1 :: Int32 as a) t1
FULL JOIN (SELECT 2 :: UInt32 as a) t2
ON t1.a = t2.a
) ORDER BY 1;
1 Nullable(Int32)
\N Nullable(Int32)
SELECT *, * APPLY toTypeName
FROM (
SELECT *
FROM (SELECT 1 :: Int32 as a) t1
FULL JOIN (SELECT 2 :: UInt32 as a) t2
ON t1.a = t2.a
) ORDER BY 1;
1 \N Nullable(Int32) Nullable(UInt32)
\N 2 Nullable(Int32) Nullable(UInt32)
SELECT *, * APPLY toTypeName
FROM (
SELECT t1.*
FROM (SELECT 1 :: Int32 as a) t1
FULL JOIN (SELECT 2 :: UInt32 as a) t2
ON t1.a = t2.a
) ORDER BY 1;
1 Nullable(Int32)
\N Nullable(Int32)
SELECT *, * APPLY toTypeName
FROM (
SELECT t2.*
FROM (SELECT 1 :: Int32 as a) t1
FULL JOIN (SELECT 2 :: UInt32 as a) t2
ON t1.a = t2.a
) ORDER BY 1;
2 Nullable(UInt32)
\N Nullable(UInt32)
SELECT *, * APPLY toTypeName
FROM (
SELECT t2.*
FROM t1
FULL JOIN t2
ON t1.a = t2.a
) ORDER BY 1;
2 2 Nullable(UInt32) Nullable(UInt32)
\N \N Nullable(UInt32) Nullable(UInt32)
SELECT *, * APPLY toTypeName
FROM (
SELECT a
FROM (SELECT 1 :: Int32 as a) t1
FULL JOIN (SELECT 2 :: UInt32 as a) t2
USING (a)
) ORDER BY 1;
1 Nullable(Int64)
2 Nullable(Int64)
SELECT *, * APPLY toTypeName
FROM (
SELECT *
FROM (SELECT 1 :: Int32 as a) t1
FULL JOIN (SELECT 2 :: UInt32 as a) t2
USING (a)
) ORDER BY 1;
1 Nullable(Int64)
2 Nullable(Int64)
SELECT *, * APPLY toTypeName
FROM (
SELECT t1.*
FROM (SELECT 1 :: Int32 as a) t1
FULL JOIN (SELECT 2 :: UInt32 as a) t2
USING (a)
) ORDER BY 1;
1 Nullable(Int32)
2 Nullable(Int32)
SELECT *, * APPLY toTypeName
FROM (
SELECT t2.*
FROM (SELECT 1 :: Int32 as a) t1
FULL JOIN (SELECT 2 :: UInt32 as a) t2
USING (a)
) ORDER BY 1;
2 Nullable(UInt32)
\N Nullable(UInt32)
SELECT *, * APPLY toTypeName
FROM (
SELECT *
FROM t1
FULL JOIN t2
USING (a)
) ORDER BY 1;
1 \N Nullable(Int64) Nullable(UInt32)
2 2 Nullable(Int64) Nullable(UInt32)
SELECT a, toTypeName(a)
FROM (
SELECT *
FROM (SELECT 1 :: Int32 as a) t1
FULL JOIN (SELECT 2 :: UInt32 as key) t2
ON t1.a = t2.key
) ORDER BY 1;
1 Nullable(Int32)
\N Nullable(Int32)
SELECT *, * APPLY toTypeName
FROM (
SELECT a
FROM (SELECT 1 :: Int32 as a) t1
FULL JOIN (SELECT 2 :: UInt32 as key) t2
ON t1.a = t2.key
) ORDER BY 1;
1 Nullable(Int32)
\N Nullable(Int32)
SELECT *, * APPLY toTypeName
FROM (
SELECT *
FROM (SELECT 1 :: Int32 as a) t1
FULL JOIN (SELECT 2 :: UInt32 as key) t2
ON t1.a = t2.key
) ORDER BY 1;
1 \N Nullable(Int32) Nullable(UInt32)
\N 2 Nullable(Int32) Nullable(UInt32)
SELECT *, * APPLY toTypeName
FROM (
SELECT t1.*
FROM (SELECT 1 :: Int32 as a) t1
FULL JOIN (SELECT 2 :: UInt32 as key) t2
ON t1.a = t2.key
) ORDER BY 1;
1 Nullable(Int32)
\N Nullable(Int32)
SELECT *, * APPLY toTypeName
FROM (
SELECT t2.*
FROM (SELECT 1 :: Int32 as a) t1
FULL JOIN (SELECT 2 :: UInt32 as key) t2
ON t1.a = t2.key
) ORDER BY 1;
2 Nullable(UInt32)
\N Nullable(UInt32)
SELECT *, * APPLY toTypeName
FROM (
SELECT t2.*
FROM t1
FULL JOIN t2
ON t1.a = t2.key
) ORDER BY 1;
2 2 Nullable(UInt32) Nullable(UInt32)
\N \N Nullable(UInt32) Nullable(UInt32)

View File

@ -0,0 +1,119 @@
DROP TABLE IF EXISTS t1;
DROP TABLE IF EXISTS t2;
CREATE TABLE t1 (a Int32) ENGINE = TinyLog;
CREATE TABLE t2 (a UInt32, key UInt32) ENGINE = TinyLog;
INSERT INTO t1 VALUES (1);
INSERT INTO t2 VALUES (2, 2);
SET join_use_nulls = 1;
SET allow_experimental_analyzer = 1;
-- { echoOn }
{% for KIND in ('LEFT', 'RIGHT', 'FULL') -%}
SELECT '============ {{ KIND }} JOIN ============' FORMAT Null;
{% for right_column_name in ['a', 'key'] -%}
SELECT a, toTypeName(a)
FROM (
SELECT *
FROM (SELECT 1 :: Int32 as a) t1
{{ KIND }} JOIN (SELECT 2 :: UInt32 as {{ right_column_name }}) t2
ON t1.a = t2.{{ right_column_name }}
) ORDER BY 1;
SELECT *, * APPLY toTypeName
FROM (
SELECT a
FROM (SELECT 1 :: Int32 as a) t1
{{ KIND }} JOIN (SELECT 2 :: UInt32 as {{ right_column_name }}) t2
ON t1.a = t2.{{ right_column_name }}
) ORDER BY 1;
SELECT *, * APPLY toTypeName
FROM (
SELECT *
FROM (SELECT 1 :: Int32 as a) t1
{{ KIND }} JOIN (SELECT 2 :: UInt32 as {{ right_column_name }}) t2
ON t1.a = t2.{{ right_column_name }}
) ORDER BY 1;
SELECT *, * APPLY toTypeName
FROM (
SELECT t1.*
FROM (SELECT 1 :: Int32 as a) t1
{{ KIND }} JOIN (SELECT 2 :: UInt32 as {{ right_column_name }}) t2
ON t1.a = t2.{{ right_column_name }}
) ORDER BY 1;
SELECT *, * APPLY toTypeName
FROM (
SELECT t2.*
FROM (SELECT 1 :: Int32 as a) t1
{{ KIND }} JOIN (SELECT 2 :: UInt32 as {{ right_column_name }}) t2
ON t1.a = t2.{{ right_column_name }}
) ORDER BY 1;
SELECT *, * APPLY toTypeName
FROM (
SELECT t2.*
FROM t1
{{ KIND }} JOIN t2
ON t1.a = t2.{{ right_column_name }}
) ORDER BY 1;
{% if right_column_name == 'a' -%}
SELECT *, * APPLY toTypeName
FROM (
SELECT a
FROM (SELECT 1 :: Int32 as a) t1
{{ KIND }} JOIN (SELECT 2 :: UInt32 as {{ right_column_name }}) t2
USING (a)
) ORDER BY 1;
SELECT *, * APPLY toTypeName
FROM (
SELECT *
FROM (SELECT 1 :: Int32 as a) t1
{{ KIND }} JOIN (SELECT 2 :: UInt32 as {{ right_column_name }}) t2
USING (a)
) ORDER BY 1;
SELECT *, * APPLY toTypeName
FROM (
SELECT t1.*
FROM (SELECT 1 :: Int32 as a) t1
{{ KIND }} JOIN (SELECT 2 :: UInt32 as {{ right_column_name }}) t2
USING (a)
) ORDER BY 1;
SELECT *, * APPLY toTypeName
FROM (
SELECT t2.*
FROM (SELECT 1 :: Int32 as a) t1
{{ KIND }} JOIN (SELECT 2 :: UInt32 as {{ right_column_name }}) t2
USING (a)
) ORDER BY 1;
SELECT *, * APPLY toTypeName
FROM (
SELECT *
FROM t1
{{ KIND }} JOIN t2
USING (a)
) ORDER BY 1;
{% endif -%}
{% endfor -%}
{% endfor -%}
-- { echoOff }
DROP TABLE IF EXISTS t1;
DROP TABLE IF EXISTS t2;