Another WIP version

This commit is contained in:
Dmitry Novik 2024-10-02 17:01:24 +02:00
parent caad5369d4
commit 7ef96b0086
7 changed files with 91 additions and 38 deletions

View File

@ -166,7 +166,6 @@ struct IdentifierResolveResult
struct IdentifierResolveState
{
IdentifierResolveResult resolve_result;
size_t count = 1;
};

View File

@ -135,11 +135,11 @@ void IdentifierResolveScope::popExpressionNode()
buffer << '\n';
}
buffer << "Identifier lookup to resolve state " << identifier_lookup_to_resolve_state.size() << '\n';
for (const auto & [identifier, state] : identifier_lookup_to_resolve_state)
buffer << "Identifier lookup to resolve state " << identifier_in_lookup_process.size() << '\n';
for (const auto & [identifier, state] : identifier_in_lookup_process)
{
buffer << "Identifier " << identifier.dump() << " resolve result ";
state.resolve_result.dump(buffer);
buffer << "Identifier " << identifier.dump() << " count ";
buffer << state.count;
buffer << '\n';
}

View File

@ -138,7 +138,10 @@ struct IdentifierResolveScope
ContextPtr context;
/// Identifier lookup to result
std::unordered_map<IdentifierLookup, IdentifierResolveState, IdentifierLookupHash> identifier_lookup_to_resolve_state;
std::unordered_map<IdentifierLookup, IdentifierResolveState, IdentifierLookupHash> identifier_in_lookup_process;
/// Identifier lookup to result
std::unordered_map<IdentifierLookup, IdentifierResolveResult, IdentifierLookupHash> identifier_lookup_to_resolve_result;
/// Argument can be expression like constant, column, function or table expression
std::unordered_map<std::string, QueryTreeNodePtr> expression_argument_name_to_node;

View File

@ -1156,6 +1156,10 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifierFromAliases(const Ide
QueryTreeNodePtr alias_node = *it;
auto node_type = alias_node->getNodeType();
if (node_type != QueryTreeNodeType::QUERY && node_type != QueryTreeNodeType::UNION)
alias_node = alias_node->clone();
if (!alias_node)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Node with alias {} is not valid. In scope {}",
@ -1180,8 +1184,6 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifierFromAliases(const Ide
return {};
}
auto node_type = alias_node->getNodeType();
auto * scope_to_resolve_alias_expression = &scope;
if (identifier_resolve_context.scope_to_resolve_alias_expression)
{
@ -1193,6 +1195,8 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifierFromAliases(const Ide
LOG_DEBUG(&Poco::Logger::get("resolveFromAliases"), "Didn't change scope_to_resolve_alias_expression");
}
scope_to_resolve_alias_expression->aliases.node_to_remove_aliases.push_back(alias_node);
LOG_DEBUG(&Poco::Logger::get("resolveFromAliases"), "Scope to resolve expressions:\n{}", scope_to_resolve_alias_expression->dump());
/// Resolve expression if necessary
@ -1400,11 +1404,11 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifier(const IdentifierLook
IdentifierResolveScope & scope,
IdentifierResolveContext identifier_resolve_settings)
{
auto it = scope.identifier_lookup_to_resolve_state.find(identifier_lookup);
auto it = scope.identifier_in_lookup_process.find(identifier_lookup);
LOG_DEBUG(&Poco::Logger::get("tryResolveIdentifier"), "Resolving '{}' in scope:\n{}\nHAVE ANOTHER SCOPE FOR EXPRESSIONS: {}", identifier_lookup.dump(), scope.dump(), identifier_resolve_settings.scope_to_resolve_alias_expression != nullptr);
bool already_in_resolve_process = false;
if (it != scope.identifier_lookup_to_resolve_state.end())
if (it != scope.identifier_in_lookup_process.end())
{
LOG_DEBUG(&Poco::Logger::get("tryResolveIdentifier"), "Found {} in cache", identifier_lookup.dump());
it->second.count++;
@ -1435,7 +1439,16 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifier(const IdentifierLook
}
else
{
auto [insert_it, _] = scope.identifier_lookup_to_resolve_state.insert({identifier_lookup, IdentifierResolveState()});
if (!identifier_resolve_settings.scope_to_resolve_alias_expression)
{
auto jt = scope.identifier_lookup_to_resolve_result.find(identifier_lookup);
if (jt != scope.identifier_lookup_to_resolve_result.end() &&
scope.use_identifier_lookup_to_result_cache &&
!scope.non_cached_identifier_lookups_during_expression_resolve.contains(identifier_lookup))
return jt->second;
}
auto [insert_it, _] = scope.identifier_in_lookup_process.insert({identifier_lookup, IdentifierResolveState()});
it = insert_it;
}
@ -1527,25 +1540,32 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifier(const IdentifierLook
}
}
/// Try to resolve table identifier from database catalog
if (!resolve_result.resolved_identifier && identifier_resolve_settings.allow_to_check_database_catalog && identifier_lookup.isTableExpressionLookup())
{
resolve_result = IdentifierResolver::tryResolveTableIdentifierFromDatabaseCatalog(identifier_lookup.identifier, scope);
}
/// Try to resolve identifier from parent scopes
if (!resolve_result.resolved_identifier)
{
resolve_result = tryResolveIdentifierInParentScopes(identifier_lookup, scope, identifier_resolve_settings);
}
/// Try to resolve table identifier from database catalog
if (!resolve_result.resolved_identifier && identifier_resolve_settings.allow_to_check_database_catalog && identifier_lookup.isTableExpressionLookup())
{
resolve_result = IdentifierResolver::tryResolveTableIdentifierFromDatabaseCatalog(identifier_lookup.identifier, scope);
}
it->second.count--;
/** If identifier was not resolved, or during expression resolution identifier was explicitly added into non cached set,
* or identifier caching was disabled in resolve scope we remove identifier lookup result from identifier lookup to result table.
*/
if (it->second.count == 0)
scope.identifier_lookup_to_resolve_state.erase(it);
{
scope.identifier_in_lookup_process.erase(it);
if (!identifier_resolve_settings.scope_to_resolve_alias_expression &&
!scope.non_cached_identifier_lookups_during_expression_resolve.contains(identifier_lookup)
&& resolve_result.isResolved())
scope.identifier_lookup_to_resolve_result[identifier_lookup] = resolve_result;
}
return resolve_result;
}
@ -3593,6 +3613,8 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(
{
checkStackSize();
LOG_DEBUG(getLogger("resolveExpressionNode"), "Resolve node:\n{}", node->dumpTree());
auto resolved_expression_it = resolved_expressions.find(node);
if (resolved_expression_it != resolved_expressions.end())
{
@ -3639,28 +3661,28 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(
* To support both (SELECT 1) AS expression in projection and (SELECT 1) as subquery in IN, do not use
* alias table because in alias table subquery could be evaluated as scalar.
*/
bool use_alias_table = !ignore_alias;
if (is_duplicated_alias || (allow_table_expression && IdentifierResolver::isSubqueryNodeType(node->getNodeType())))
use_alias_table = false;
// bool use_alias_table = !ignore_alias;
// if (is_duplicated_alias || (allow_table_expression && IdentifierResolver::isSubqueryNodeType(node->getNodeType())))
// use_alias_table = false;
if (!node_alias.empty() && use_alias_table)
{
/** Node could be potentially resolved by resolving other nodes.
* SELECT b, a as b FROM test_table;
*
* To resolve b we need to resolve a.
*/
auto it = scope.aliases.alias_name_to_expression_node.find(node_alias);
if (it != scope.aliases.alias_name_to_expression_node.end())
node = it->second->clone();
// if (!node_alias.empty() && use_alias_table)
// {
// /** Node could be potentially resolved by resolving other nodes.
// * SELECT b, a as b FROM test_table;
// *
// * To resolve b we need to resolve a.
// */
// auto it = scope.aliases.alias_name_to_expression_node.find(node_alias);
// if (it != scope.aliases.alias_name_to_expression_node.end())
// node = it->second->clone();
if (allow_lambda_expression)
{
it = scope.aliases.alias_name_to_lambda_node.find(node_alias);
if (it != scope.aliases.alias_name_to_lambda_node.end())
node = it->second->clone();
}
}
// if (allow_lambda_expression)
// {
// it = scope.aliases.alias_name_to_lambda_node.find(node_alias);
// if (it != scope.aliases.alias_name_to_lambda_node.end())
// node = it->second->clone();
// }
// }
scope.pushExpressionNode(node);
@ -3910,11 +3932,14 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(
if (!in_aggregate_function_scope)
{
LOG_DEBUG(getLogger("resolveExpressionNode"), "Looking for GROUP BY key:\n{}", node->dumpTree());
for (const auto * scope_ptr = &scope; scope_ptr; scope_ptr = scope_ptr->parent_scope)
{
auto it = scope_ptr->nullable_group_by_keys.find(node);
if (it != scope_ptr->nullable_group_by_keys.end())
{
LOG_DEBUG(getLogger("resolveExpressionNode"), "Found GROUP BY key");
node = it->node->clone();
node->convertToNullable();
break;
@ -4169,6 +4194,9 @@ void QueryAnalyzer::resolveGroupByNode(QueryNode & query_node_typed, IdentifierR
scope.nullable_group_by_keys.insert(group_by_elem);
}
}
if (scope.group_by_use_nulls)
scope.identifier_lookup_to_resolve_result.clear();
}
/** Resolve interpolate columns nodes list.
@ -5667,6 +5695,9 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
/// Remove aliases from expression and lambda nodes
for (auto & node : scope.aliases.node_to_remove_aliases)
node->removeAlias();
for (auto & [_, node] : scope.aliases.alias_name_to_expression_node)
node->removeAlias();

View File

@ -24,6 +24,8 @@ struct ScopeAliases
std::unordered_set<QueryTreeNodePtr> nodes_with_duplicated_aliases;
std::vector<QueryTreeNodePtr> cloned_nodes_with_duplicated_aliases;
QueryTreeNodes node_to_remove_aliases;
/// Names which are aliases from ARRAY JOIN.
/// This is needed to properly qualify columns from matchers and avoid name collision.
std::unordered_set<std::string> array_join_aliases;

View File

@ -1 +1,11 @@
10
0 0
0 1
0 2
0 3
0 4
5 5
5 6
5 7
5 8
5 9

View File

@ -25,3 +25,11 @@ SELECT v FROM (
);
WITH (SELECT v FROM vecs_Float32 limit 1) AS a SELECT count(dp) FROM (SELECT dotProduct(a, v) AS dp FROM vecs_Float32);
WITH
t as (SELECT number + a as x FROM numbers(5))
SELECT 0 as a, x FROM t
UNION ALL
SELECT 5 as a, x FROM t
ORDER BY a, x
SETTINGS allow_experimental_analyzer = 1;