From 7ef96b0086e936ff2ce05d0c93cb860d6e513bb0 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Wed, 2 Oct 2024 17:01:24 +0200 Subject: [PATCH] Another WIP version --- src/Analyzer/Resolve/IdentifierLookup.h | 1 - .../Resolve/IdentifierResolveScope.cpp | 8 +- src/Analyzer/Resolve/IdentifierResolveScope.h | 5 +- src/Analyzer/Resolve/QueryAnalyzer.cpp | 95 ++++++++++++------- src/Analyzer/Resolve/ScopeAliases.h | 2 + ...alyzer_resolve_from_parent_scope.reference | 10 ++ ...033_analyzer_resolve_from_parent_scope.sql | 8 ++ 7 files changed, 91 insertions(+), 38 deletions(-) diff --git a/src/Analyzer/Resolve/IdentifierLookup.h b/src/Analyzer/Resolve/IdentifierLookup.h index f8518fe535f..742613b8dac 100644 --- a/src/Analyzer/Resolve/IdentifierLookup.h +++ b/src/Analyzer/Resolve/IdentifierLookup.h @@ -166,7 +166,6 @@ struct IdentifierResolveResult struct IdentifierResolveState { - IdentifierResolveResult resolve_result; size_t count = 1; }; diff --git a/src/Analyzer/Resolve/IdentifierResolveScope.cpp b/src/Analyzer/Resolve/IdentifierResolveScope.cpp index 1e1e54f574e..88b95868126 100644 --- a/src/Analyzer/Resolve/IdentifierResolveScope.cpp +++ b/src/Analyzer/Resolve/IdentifierResolveScope.cpp @@ -135,11 +135,11 @@ void IdentifierResolveScope::popExpressionNode() buffer << '\n'; } - buffer << "Identifier lookup to resolve state " << identifier_lookup_to_resolve_state.size() << '\n'; - for (const auto & [identifier, state] : identifier_lookup_to_resolve_state) + buffer << "Identifier lookup to resolve state " << identifier_in_lookup_process.size() << '\n'; + for (const auto & [identifier, state] : identifier_in_lookup_process) { - buffer << "Identifier " << identifier.dump() << " resolve result "; - state.resolve_result.dump(buffer); + buffer << "Identifier " << identifier.dump() << " count "; + buffer << state.count; buffer << '\n'; } diff --git a/src/Analyzer/Resolve/IdentifierResolveScope.h b/src/Analyzer/Resolve/IdentifierResolveScope.h index 7d9c7d74295..063f3b8d828 100644 --- a/src/Analyzer/Resolve/IdentifierResolveScope.h +++ b/src/Analyzer/Resolve/IdentifierResolveScope.h @@ -138,7 +138,10 @@ struct IdentifierResolveScope ContextPtr context; /// Identifier lookup to result - std::unordered_map identifier_lookup_to_resolve_state; + std::unordered_map identifier_in_lookup_process; + + /// Identifier lookup to result + std::unordered_map identifier_lookup_to_resolve_result; /// Argument can be expression like constant, column, function or table expression std::unordered_map expression_argument_name_to_node; diff --git a/src/Analyzer/Resolve/QueryAnalyzer.cpp b/src/Analyzer/Resolve/QueryAnalyzer.cpp index 73a6a9e7f77..be0fb0d6a49 100644 --- a/src/Analyzer/Resolve/QueryAnalyzer.cpp +++ b/src/Analyzer/Resolve/QueryAnalyzer.cpp @@ -1156,6 +1156,10 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifierFromAliases(const Ide QueryTreeNodePtr alias_node = *it; + auto node_type = alias_node->getNodeType(); + if (node_type != QueryTreeNodeType::QUERY && node_type != QueryTreeNodeType::UNION) + alias_node = alias_node->clone(); + if (!alias_node) throw Exception(ErrorCodes::LOGICAL_ERROR, "Node with alias {} is not valid. In scope {}", @@ -1180,8 +1184,6 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifierFromAliases(const Ide return {}; } - auto node_type = alias_node->getNodeType(); - auto * scope_to_resolve_alias_expression = &scope; if (identifier_resolve_context.scope_to_resolve_alias_expression) { @@ -1193,6 +1195,8 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifierFromAliases(const Ide LOG_DEBUG(&Poco::Logger::get("resolveFromAliases"), "Didn't change scope_to_resolve_alias_expression"); } + scope_to_resolve_alias_expression->aliases.node_to_remove_aliases.push_back(alias_node); + LOG_DEBUG(&Poco::Logger::get("resolveFromAliases"), "Scope to resolve expressions:\n{}", scope_to_resolve_alias_expression->dump()); /// Resolve expression if necessary @@ -1400,11 +1404,11 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifier(const IdentifierLook IdentifierResolveScope & scope, IdentifierResolveContext identifier_resolve_settings) { - auto it = scope.identifier_lookup_to_resolve_state.find(identifier_lookup); + auto it = scope.identifier_in_lookup_process.find(identifier_lookup); LOG_DEBUG(&Poco::Logger::get("tryResolveIdentifier"), "Resolving '{}' in scope:\n{}\nHAVE ANOTHER SCOPE FOR EXPRESSIONS: {}", identifier_lookup.dump(), scope.dump(), identifier_resolve_settings.scope_to_resolve_alias_expression != nullptr); bool already_in_resolve_process = false; - if (it != scope.identifier_lookup_to_resolve_state.end()) + if (it != scope.identifier_in_lookup_process.end()) { LOG_DEBUG(&Poco::Logger::get("tryResolveIdentifier"), "Found {} in cache", identifier_lookup.dump()); it->second.count++; @@ -1435,7 +1439,16 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifier(const IdentifierLook } else { - auto [insert_it, _] = scope.identifier_lookup_to_resolve_state.insert({identifier_lookup, IdentifierResolveState()}); + if (!identifier_resolve_settings.scope_to_resolve_alias_expression) + { + auto jt = scope.identifier_lookup_to_resolve_result.find(identifier_lookup); + if (jt != scope.identifier_lookup_to_resolve_result.end() && + scope.use_identifier_lookup_to_result_cache && + !scope.non_cached_identifier_lookups_during_expression_resolve.contains(identifier_lookup)) + return jt->second; + } + + auto [insert_it, _] = scope.identifier_in_lookup_process.insert({identifier_lookup, IdentifierResolveState()}); it = insert_it; } @@ -1527,25 +1540,32 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifier(const IdentifierLook } } - /// Try to resolve table identifier from database catalog - if (!resolve_result.resolved_identifier && identifier_resolve_settings.allow_to_check_database_catalog && identifier_lookup.isTableExpressionLookup()) - { - resolve_result = IdentifierResolver::tryResolveTableIdentifierFromDatabaseCatalog(identifier_lookup.identifier, scope); - } - /// Try to resolve identifier from parent scopes if (!resolve_result.resolved_identifier) { resolve_result = tryResolveIdentifierInParentScopes(identifier_lookup, scope, identifier_resolve_settings); } + /// Try to resolve table identifier from database catalog + if (!resolve_result.resolved_identifier && identifier_resolve_settings.allow_to_check_database_catalog && identifier_lookup.isTableExpressionLookup()) + { + resolve_result = IdentifierResolver::tryResolveTableIdentifierFromDatabaseCatalog(identifier_lookup.identifier, scope); + } + it->second.count--; /** If identifier was not resolved, or during expression resolution identifier was explicitly added into non cached set, * or identifier caching was disabled in resolve scope we remove identifier lookup result from identifier lookup to result table. */ if (it->second.count == 0) - scope.identifier_lookup_to_resolve_state.erase(it); + { + scope.identifier_in_lookup_process.erase(it); + + if (!identifier_resolve_settings.scope_to_resolve_alias_expression && + !scope.non_cached_identifier_lookups_during_expression_resolve.contains(identifier_lookup) + && resolve_result.isResolved()) + scope.identifier_lookup_to_resolve_result[identifier_lookup] = resolve_result; + } return resolve_result; } @@ -3593,6 +3613,8 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode( { checkStackSize(); + LOG_DEBUG(getLogger("resolveExpressionNode"), "Resolve node:\n{}", node->dumpTree()); + auto resolved_expression_it = resolved_expressions.find(node); if (resolved_expression_it != resolved_expressions.end()) { @@ -3639,28 +3661,28 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode( * To support both (SELECT 1) AS expression in projection and (SELECT 1) as subquery in IN, do not use * alias table because in alias table subquery could be evaluated as scalar. */ - bool use_alias_table = !ignore_alias; - if (is_duplicated_alias || (allow_table_expression && IdentifierResolver::isSubqueryNodeType(node->getNodeType()))) - use_alias_table = false; + // bool use_alias_table = !ignore_alias; + // if (is_duplicated_alias || (allow_table_expression && IdentifierResolver::isSubqueryNodeType(node->getNodeType()))) + // use_alias_table = false; - if (!node_alias.empty() && use_alias_table) - { - /** Node could be potentially resolved by resolving other nodes. - * SELECT b, a as b FROM test_table; - * - * To resolve b we need to resolve a. - */ - auto it = scope.aliases.alias_name_to_expression_node.find(node_alias); - if (it != scope.aliases.alias_name_to_expression_node.end()) - node = it->second->clone(); + // if (!node_alias.empty() && use_alias_table) + // { + // /** Node could be potentially resolved by resolving other nodes. + // * SELECT b, a as b FROM test_table; + // * + // * To resolve b we need to resolve a. + // */ + // auto it = scope.aliases.alias_name_to_expression_node.find(node_alias); + // if (it != scope.aliases.alias_name_to_expression_node.end()) + // node = it->second->clone(); - if (allow_lambda_expression) - { - it = scope.aliases.alias_name_to_lambda_node.find(node_alias); - if (it != scope.aliases.alias_name_to_lambda_node.end()) - node = it->second->clone(); - } - } + // if (allow_lambda_expression) + // { + // it = scope.aliases.alias_name_to_lambda_node.find(node_alias); + // if (it != scope.aliases.alias_name_to_lambda_node.end()) + // node = it->second->clone(); + // } + // } scope.pushExpressionNode(node); @@ -3910,11 +3932,14 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode( if (!in_aggregate_function_scope) { + LOG_DEBUG(getLogger("resolveExpressionNode"), "Looking for GROUP BY key:\n{}", node->dumpTree()); for (const auto * scope_ptr = &scope; scope_ptr; scope_ptr = scope_ptr->parent_scope) { auto it = scope_ptr->nullable_group_by_keys.find(node); if (it != scope_ptr->nullable_group_by_keys.end()) { + LOG_DEBUG(getLogger("resolveExpressionNode"), "Found GROUP BY key"); + node = it->node->clone(); node->convertToNullable(); break; @@ -4169,6 +4194,9 @@ void QueryAnalyzer::resolveGroupByNode(QueryNode & query_node_typed, IdentifierR scope.nullable_group_by_keys.insert(group_by_elem); } } + + if (scope.group_by_use_nulls) + scope.identifier_lookup_to_resolve_result.clear(); } /** Resolve interpolate columns nodes list. @@ -5667,6 +5695,9 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier /// Remove aliases from expression and lambda nodes + for (auto & node : scope.aliases.node_to_remove_aliases) + node->removeAlias(); + for (auto & [_, node] : scope.aliases.alias_name_to_expression_node) node->removeAlias(); diff --git a/src/Analyzer/Resolve/ScopeAliases.h b/src/Analyzer/Resolve/ScopeAliases.h index cd9cc6339b4..f4333efa726 100644 --- a/src/Analyzer/Resolve/ScopeAliases.h +++ b/src/Analyzer/Resolve/ScopeAliases.h @@ -24,6 +24,8 @@ struct ScopeAliases std::unordered_set nodes_with_duplicated_aliases; std::vector cloned_nodes_with_duplicated_aliases; + QueryTreeNodes node_to_remove_aliases; + /// Names which are aliases from ARRAY JOIN. /// This is needed to properly qualify columns from matchers and avoid name collision. std::unordered_set array_join_aliases; diff --git a/tests/queries/0_stateless/03033_analyzer_resolve_from_parent_scope.reference b/tests/queries/0_stateless/03033_analyzer_resolve_from_parent_scope.reference index f599e28b8ab..18910989d03 100644 --- a/tests/queries/0_stateless/03033_analyzer_resolve_from_parent_scope.reference +++ b/tests/queries/0_stateless/03033_analyzer_resolve_from_parent_scope.reference @@ -1 +1,11 @@ 10 +0 0 +0 1 +0 2 +0 3 +0 4 +5 5 +5 6 +5 7 +5 8 +5 9 diff --git a/tests/queries/0_stateless/03033_analyzer_resolve_from_parent_scope.sql b/tests/queries/0_stateless/03033_analyzer_resolve_from_parent_scope.sql index 22f103c9bd5..dc360708789 100644 --- a/tests/queries/0_stateless/03033_analyzer_resolve_from_parent_scope.sql +++ b/tests/queries/0_stateless/03033_analyzer_resolve_from_parent_scope.sql @@ -25,3 +25,11 @@ SELECT v FROM ( ); WITH (SELECT v FROM vecs_Float32 limit 1) AS a SELECT count(dp) FROM (SELECT dotProduct(a, v) AS dp FROM vecs_Float32); + +WITH + t as (SELECT number + a as x FROM numbers(5)) +SELECT 0 as a, x FROM t +UNION ALL +SELECT 5 as a, x FROM t +ORDER BY a, x +SETTINGS allow_experimental_analyzer = 1;