From 861dcbbffbef382c542e6e9485ec21a8e5fa5ffd Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 11 Jul 2024 16:04:00 +0000 Subject: [PATCH] Do not check parent scope for group_by_use_nulls outside of subquery. --- .../Resolve/IdentifierResolveScope.cpp | 3 +- src/Analyzer/Resolve/IdentifierResolveScope.h | 4 +- src/Analyzer/Resolve/QueryAnalyzer.cpp | 30 +++++++------ ...2535_analyzer_group_by_use_nulls.reference | 41 +++++++++++++++++ .../02535_analyzer_group_by_use_nulls.sql | 45 +++++++++++++++++++ 5 files changed, 108 insertions(+), 15 deletions(-) diff --git a/src/Analyzer/Resolve/IdentifierResolveScope.cpp b/src/Analyzer/Resolve/IdentifierResolveScope.cpp index ae363b57047..32b3107ac16 100644 --- a/src/Analyzer/Resolve/IdentifierResolveScope.cpp +++ b/src/Analyzer/Resolve/IdentifierResolveScope.cpp @@ -11,9 +11,10 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -IdentifierResolveScope::IdentifierResolveScope(QueryTreeNodePtr scope_node_, IdentifierResolveScope * parent_scope_) +IdentifierResolveScope::IdentifierResolveScope(QueryTreeNodePtr scope_node_, IdentifierResolveScope * parent_scope_, bool is_query_) : scope_node(std::move(scope_node_)) , parent_scope(parent_scope_) + , is_query(is_query_) { if (parent_scope) { diff --git a/src/Analyzer/Resolve/IdentifierResolveScope.h b/src/Analyzer/Resolve/IdentifierResolveScope.h index ab2e27cc14d..917e032321d 100644 --- a/src/Analyzer/Resolve/IdentifierResolveScope.h +++ b/src/Analyzer/Resolve/IdentifierResolveScope.h @@ -128,7 +128,7 @@ constexpr auto PROJECTION_NAME_PLACEHOLDER = "__projection_name_placeholder"; struct IdentifierResolveScope { /// Construct identifier resolve scope using scope node, and parent scope - IdentifierResolveScope(QueryTreeNodePtr scope_node_, IdentifierResolveScope * parent_scope_); + IdentifierResolveScope(QueryTreeNodePtr scope_node_, IdentifierResolveScope * parent_scope_, bool is_query_); QueryTreeNodePtr scope_node; @@ -188,6 +188,8 @@ struct IdentifierResolveScope /// Join retutns NULLs instead of default values bool join_use_nulls = false; + bool is_query; + /// JOINs count size_t joins_count = 0; diff --git a/src/Analyzer/Resolve/QueryAnalyzer.cpp b/src/Analyzer/Resolve/QueryAnalyzer.cpp index 576c4943ccb..63249ad2aeb 100644 --- a/src/Analyzer/Resolve/QueryAnalyzer.cpp +++ b/src/Analyzer/Resolve/QueryAnalyzer.cpp @@ -117,7 +117,7 @@ QueryAnalyzer::~QueryAnalyzer() = default; void QueryAnalyzer::resolve(QueryTreeNodePtr & node, const QueryTreeNodePtr & table_expression, ContextPtr context) { - IdentifierResolveScope scope(node, nullptr /*parent_scope*/); + IdentifierResolveScope scope(node, nullptr /*parent_scope*/, true /*is_query*/); if (!scope.context) scope.context = context; @@ -509,7 +509,7 @@ void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, Iden /// exception with this settings enabled(https://github.com/ClickHouse/ClickHouse/issues/52494). subquery_context->setSetting("use_structure_from_insertion_table_in_table_functions", false); - auto options = SelectQueryOptions(QueryProcessingStage::Complete, scope.subquery_depth, true /*is_subquery*/); + auto options = SelectQueryOptions(QueryProcessingStage::Complete, scope.subquery_depth, true /*is_query*/); options.only_analyze = only_analyze; auto interpreter = std::make_unique(node->toAST(), subquery_context, subquery_context->getViewSource(), options); @@ -2163,7 +2163,7 @@ ProjectionNames QueryAnalyzer::resolveMatcher(QueryTreeNodePtr & matcher_node, I if (apply_transformer->getApplyTransformerType() == ApplyColumnTransformerType::LAMBDA) { auto lambda_expression_to_resolve = expression_node->clone(); - IdentifierResolveScope lambda_scope(expression_node, &scope /*parent_scope*/); + IdentifierResolveScope lambda_scope(expression_node, &scope /*parent_scope*/, false /*is_query*/); node_projection_names = resolveLambda(expression_node, lambda_expression_to_resolve, {node}, lambda_scope); auto & lambda_expression_to_resolve_typed = lambda_expression_to_resolve->as(); node = lambda_expression_to_resolve_typed.getExpression(); @@ -3036,7 +3036,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi auto lambda_expression_clone = lambda_expression_untyped->clone(); - IdentifierResolveScope lambda_scope(lambda_expression_clone, &scope /*parent_scope*/); + IdentifierResolveScope lambda_scope(lambda_expression_clone, &scope /*parent_scope*/, false /*is_query*/); ProjectionNames lambda_projection_names = resolveLambda(lambda_expression_untyped, lambda_expression_clone, function_arguments, lambda_scope); auto & resolved_lambda = lambda_expression_clone->as(); @@ -3291,7 +3291,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi lambda_arguments.push_back(std::make_shared(std::move(column_name_and_type), lambda_to_resolve)); } - IdentifierResolveScope lambda_scope(lambda_to_resolve, &scope /*parent_scope*/); + IdentifierResolveScope lambda_scope(lambda_to_resolve, &scope /*parent_scope*/, false /*is_query*/); lambda_projection_names = resolveLambda(lambda_argument, lambda_to_resolve, lambda_arguments, lambda_scope); if (auto * lambda_list_node_result = lambda_to_resolve_typed.getExpression()->as()) @@ -3512,7 +3512,7 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode( auto node_type = node->getNodeType(); if (!allow_table_expression && (node_type == QueryTreeNodeType::QUERY || node_type == QueryTreeNodeType::UNION)) { - IdentifierResolveScope subquery_scope(node, &scope /*parent_scope*/); + IdentifierResolveScope subquery_scope(node, &scope /*parent_scope*/, false /*is_query*/); subquery_scope.subquery_depth = scope.subquery_depth + 1; evaluateScalarSubqueryIfNeeded(node, subquery_scope); @@ -3619,7 +3619,7 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode( else union_node->setIsCTE(false); - IdentifierResolveScope subquery_scope(resolved_identifier_node, &scope /*parent_scope*/); + IdentifierResolveScope subquery_scope(resolved_identifier_node, &scope /*parent_scope*/, true /*is_query*/); subquery_scope.subquery_depth = scope.subquery_depth + 1; /// CTE is being resolved, it's required to forbid to resolve to it again @@ -3752,7 +3752,7 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode( [[fallthrough]]; case QueryTreeNodeType::UNION: { - IdentifierResolveScope subquery_scope(node, &scope /*parent_scope*/); + IdentifierResolveScope subquery_scope(node, &scope /*parent_scope*/, true /*is_query*/); subquery_scope.subquery_depth = scope.subquery_depth + 1; std::string projection_name = "_subquery_" + std::to_string(subquery_counter); @@ -3826,6 +3826,10 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode( node->convertToNullable(); break; } + + /// Check parent scopes until find current query scope. + if (scope_ptr->is_query) + break; } } @@ -4112,7 +4116,7 @@ void QueryAnalyzer::resolveInterpolateColumnsNodeList(QueryTreeNodePtr & interpo bool is_column_constant = interpolate_node_typed.getExpression()->getNodeType() == QueryTreeNodeType::CONSTANT; auto & interpolation_to_resolve = interpolate_node_typed.getInterpolateExpression(); - IdentifierResolveScope interpolate_scope(interpolation_to_resolve, &scope /*parent_scope*/); + IdentifierResolveScope interpolate_scope(interpolation_to_resolve, &scope /*parent_scope*/, false /*is_query*/); auto fake_column_node = std::make_shared(NameAndTypePair(column_to_interpolate_name, interpolate_node_typed.getExpression()->getResultType()), interpolate_node_typed.getExpression()); if (is_column_constant) @@ -4410,7 +4414,7 @@ void QueryAnalyzer::initializeTableExpressionData(const QueryTreeNodePtr & table */ alias_column_to_resolve = column_name_to_column_node[alias_column_to_resolve_name]; - IdentifierResolveScope alias_column_resolve_scope(alias_column_to_resolve, nullptr /*parent_scope*/); + IdentifierResolveScope alias_column_resolve_scope(alias_column_to_resolve, nullptr /*parent_scope*/, false /*is_query*/); alias_column_resolve_scope.column_name_to_column_node = std::move(column_name_to_column_node); alias_column_resolve_scope.context = scope.context; @@ -5003,7 +5007,7 @@ void QueryAnalyzer::resolveJoin(QueryTreeNodePtr & join_node, IdentifierResolveS left_subquery->getProjection().getNodes().push_back(projection_node->clone()); left_subquery->getJoinTree() = left_table_expression; - IdentifierResolveScope left_subquery_scope(left_subquery, nullptr /*parent_scope*/); + IdentifierResolveScope left_subquery_scope(left_subquery, nullptr /*parent_scope*/, true /*is_query*/); resolveQuery(left_subquery, left_subquery_scope); const auto & resolved_nodes = left_subquery->getProjection().getNodes(); @@ -5612,7 +5616,7 @@ void QueryAnalyzer::resolveUnion(const QueryTreeNodePtr & union_node, Identifier auto & non_recursive_query_mutable_context = non_recursive_query_is_query_node ? non_recursive_query->as().getMutableContext() : non_recursive_query->as().getMutableContext(); - IdentifierResolveScope non_recursive_subquery_scope(non_recursive_query, &scope /*parent_scope*/); + IdentifierResolveScope non_recursive_subquery_scope(non_recursive_query, &scope /*parent_scope*/, true /*is_query*/); non_recursive_subquery_scope.subquery_depth = scope.subquery_depth + 1; if (non_recursive_query_is_query_node) @@ -5643,7 +5647,7 @@ void QueryAnalyzer::resolveUnion(const QueryTreeNodePtr & union_node, Identifier { auto & query_node = queries_nodes[i]; - IdentifierResolveScope subquery_scope(query_node, &scope /*parent_scope*/); + IdentifierResolveScope subquery_scope(query_node, &scope /*parent_scope*/, true /*is_subquery*/); if (recursive_cte_table_node) subquery_scope.expression_argument_name_to_node[union_node_typed.getCTEName()] = recursive_cte_table_node; diff --git a/tests/queries/0_stateless/02535_analyzer_group_by_use_nulls.reference b/tests/queries/0_stateless/02535_analyzer_group_by_use_nulls.reference index 63610604ddd..858fbe98838 100644 --- a/tests/queries/0_stateless/02535_analyzer_group_by_use_nulls.reference +++ b/tests/queries/0_stateless/02535_analyzer_group_by_use_nulls.reference @@ -264,3 +264,44 @@ SETTINGS group_by_use_nulls = 1, max_bytes_before_external_sort=10; 9 \N 9 \N 0 20 \N 1 25 +CREATE TABLE test +ENGINE = ReplacingMergeTree +PRIMARY KEY id +AS SELECT number AS id FROM numbers(100); +SELECT id +FROM test +GROUP BY id + WITH CUBE +HAVING id IN ( + SELECT id + FROM test +) +FORMAT `NUll` +SETTINGS allow_experimental_analyzer = 1, group_by_use_nulls = true; +SELECT id +FROM test +FINAL +GROUP BY id + WITH CUBE +HAVING id IN ( + SELECT DISTINCT id + FROM test + FINAL +) +FORMAT `NUll` +SETTINGS allow_experimental_analyzer = 1, group_by_use_nulls = true; +SELECT id +FROM test +FINAL +GROUP BY + GROUPING SETS ((id)) +ORDER BY + id IN ( + SELECT DISTINCT id + FROM test + FINAL + LIMIT 4 + ) ASC +LIMIT 256 BY id +FORMAT `NUll` +SETTINGS allow_experimental_analyzer = 1, group_by_use_nulls=true; diff --git a/tests/queries/0_stateless/02535_analyzer_group_by_use_nulls.sql b/tests/queries/0_stateless/02535_analyzer_group_by_use_nulls.sql index a4d4f2f8bc9..4ae5df9629a 100644 --- a/tests/queries/0_stateless/02535_analyzer_group_by_use_nulls.sql +++ b/tests/queries/0_stateless/02535_analyzer_group_by_use_nulls.sql @@ -83,3 +83,48 @@ GROUP BY ) ORDER BY 1, tuple(val) SETTINGS group_by_use_nulls = 1, max_bytes_before_external_sort=10; + +CREATE TABLE test +ENGINE = ReplacingMergeTree +PRIMARY KEY id +AS SELECT number AS id FROM numbers(100); + +SELECT id +FROM test +GROUP BY id + WITH CUBE +HAVING id IN ( + SELECT id + FROM test +) +FORMAT `NUll` +SETTINGS allow_experimental_analyzer = 1, group_by_use_nulls = true; + +SELECT id +FROM test +FINAL +GROUP BY id + WITH CUBE +HAVING id IN ( + SELECT DISTINCT id + FROM test + FINAL +) +FORMAT `NUll` +SETTINGS allow_experimental_analyzer = 1, group_by_use_nulls = true; + +SELECT id +FROM test +FINAL +GROUP BY + GROUPING SETS ((id)) +ORDER BY + id IN ( + SELECT DISTINCT id + FROM test + FINAL + LIMIT 4 + ) ASC +LIMIT 256 BY id +FORMAT `NUll` +SETTINGS allow_experimental_analyzer = 1, group_by_use_nulls=true;