Backport #63962 to 24.3: Analyzer: Fix COLUMNS resolve

This commit is contained in:
robot-clickhouse 2024-05-22 21:05:24 +00:00
parent 825d8725c4
commit e40a5e06ba
4 changed files with 50 additions and 3 deletions

View File

@ -4613,6 +4613,36 @@ QueryAnalyzer::QueryTreeNodesWithNames QueryAnalyzer::resolveUnqualifiedMatcher(
std::unordered_set<std::string> table_expression_column_names_to_skip;
QueryTreeNodesWithNames result;
if (matcher_node_typed.getMatcherType() == MatcherNodeType::COLUMNS_LIST)
{
auto identifiers = matcher_node_typed.getColumnsIdentifiers();
result.reserve(identifiers.size());
for (const auto & identifier : identifiers)
{
auto resolve_result = tryResolveIdentifier(IdentifierLookup{identifier, IdentifierLookupContext::EXPRESSION}, scope);
if (!resolve_result.isResolved())
throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER,
"Unknown identifier '{}' inside COLUMNS matcher. In scope {}",
identifier.getFullName(), scope.dump());
// TODO: Introduce IdentifierLookupContext::COLUMN and get rid of this check
auto * resolved_column = resolve_result.resolved_identifier->as<ColumnNode>();
if (!resolved_column)
throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER,
"Identifier '{}' inside COLUMNS matcher must resolve into a column, but got {}. In scope {}",
identifier.getFullName(),
resolve_result.resolved_identifier->getNodeTypeName(),
scope.scope_node->formatASTForErrorMessage());
result.emplace_back(resolve_result.resolved_identifier, resolved_column->getColumnName());
}
return result;
}
result.resize(matcher_node_typed.getColumnsIdentifiers().size());
for (auto & table_expression : table_expressions_stack)
{
bool table_expression_in_resolve_process = scope.table_expressions_in_resolve_process.contains(table_expression.get());
@ -4780,8 +4810,6 @@ QueryAnalyzer::QueryTreeNodesWithNames QueryAnalyzer::resolveUnqualifiedMatcher(
table_expressions_column_nodes_with_names_stack.push_back(std::move(matched_column_nodes_with_names));
}
QueryTreeNodesWithNames result;
for (auto & table_expression_column_nodes_with_names : table_expressions_column_nodes_with_names_stack)
{
for (auto && table_expression_column_node_with_name : table_expression_column_nodes_with_names)

View File

@ -61,6 +61,11 @@ CREATE TABLE github_events
)
ENGINE = MergeTree ORDER BY (event_type, repo_name, created_at);
with top_repos as ( select repo_name from github_events where event_type = 'WatchEvent' and toDate(created_at) = today() - 1 group by repo_name order by count() desc limit 100 union distinct select repo_name from github_events where event_type = 'WatchEvent' and toMonday(created_at) = toMonday(today() - interval 1 week) group by repo_name order by count() desc limit 100 union distinct select repo_name from github_events where event_type = 'WatchEvent' and toStartOfMonth(created_at) = toStartOfMonth(today()) - interval 1 month group by repo_name order by count() desc limit 100 union distinct select repo_name from github_events where event_type = 'WatchEvent' and toYear(created_at) = toYear(today()) - 1 group by repo_name order by count() desc limit 100 ), last_day as ( select repo_name, count() as count_last_day, rowNumberInAllBlocks() + 1 as position_last_day from github_events where repo_name in (select repo_name from top_repos) and toDate(created_at) = today() - 1 group by repo_name order by count_last_day desc ), last_week as ( select repo_name, count() as count_last_week, rowNumberInAllBlocks() + 1 as position_last_week from github_events where repo_name in (select repo_name from top_repos) and toMonday(created_at) = toMonday(today()) - interval 1 week group by repo_name order by count_last_week desc ), last_month as ( select repo_name, count() as count_last_month, rowNumberInAllBlocks() + 1 as position_last_month from github_events where repo_name in (select repo_name from top_repos) and toStartOfMonth(created_at) = toStartOfMonth(today()) - interval 1 month group by repo_name order by count_last_month desc ) select d.repo_name, columns(count) from last_day d join last_week w on d.repo_name = w.repo_name join last_month m on d.repo_name = m.repo_name;
with
top_repos as ( select repo_name from github_events where event_type = 'WatchEvent' and toDate(created_at) = today() - 1 group by repo_name order by count() desc limit 100 union distinct select repo_name from github_events where event_type = 'WatchEvent' and toMonday(created_at) = toMonday(today() - interval 1 week) group by repo_name order by count() desc limit 100 union distinct select repo_name from github_events where event_type = 'WatchEvent' and toStartOfMonth(created_at) = toStartOfMonth(today()) - interval 1 month group by repo_name order by count() desc limit 100 union distinct select repo_name from github_events where event_type = 'WatchEvent' and toYear(created_at) = toYear(today()) - 1 group by repo_name order by count() desc limit 100 ),
last_day as ( select repo_name, count() as count_last_day, rowNumberInAllBlocks() + 1 as position_last_day from github_events where repo_name in (select repo_name from top_repos) and toDate(created_at) = today() - 1 group by repo_name order by count_last_day desc ),
last_week as ( select repo_name, count() as count_last_week, rowNumberInAllBlocks() + 1 as position_last_week from github_events where repo_name in (select repo_name from top_repos) and toMonday(created_at) = toMonday(today()) - interval 1 week group by repo_name order by count_last_week desc ),
last_month as ( select repo_name, count() as count_last_month, rowNumberInAllBlocks() + 1 as position_last_month from github_events where repo_name in (select repo_name from top_repos) and toStartOfMonth(created_at) = toStartOfMonth(today()) - interval 1 month group by repo_name order by count_last_month desc )
select d.repo_name, columns('count') from last_day d join last_week w on d.repo_name = w.repo_name join last_month m on d.repo_name = m.repo_name;
DROP TABLE github_events;

View File

@ -0,0 +1 @@
4 3

View File

@ -0,0 +1,13 @@
CREATE TABLE test
(
foo String,
bar String,
)
ENGINE = MergeTree()
ORDER BY (foo, bar);
INSERT INTO test VALUES ('foo', 'bar1');
SELECT COLUMNS(bar, foo) APPLY (length) FROM test;
SELECT COLUMNS(bar, foo, xyz) APPLY (length) FROM test; -- { serverError UNKNOWN_IDENTIFIER }