diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 1f81ac54078..907a732493d 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -6651,6 +6651,7 @@ void QueryAnalyzer::initializeTableExpressionData(const QueryTreeNodePtr & table if (column_default && column_default->kind == ColumnDefaultKind::Alias) { auto alias_expression = buildQueryTree(column_default->expression, scope.context); + alias_expression = buildCastFunction(alias_expression, column_name_and_type.type, scope.context, false /*resolve*/); auto column_node = std::make_shared(column_name_and_type, std::move(alias_expression), table_expression_node); column_name_to_column_node.emplace(column_name_and_type.name, column_node); alias_columns_to_resolve.emplace_back(column_name_and_type.name, column_node); @@ -6683,9 +6684,7 @@ void QueryAnalyzer::initializeTableExpressionData(const QueryTreeNodePtr & table alias_column_resolve_scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); - auto & resolved_expression = alias_column_to_resolve->getExpression(); - if (!resolved_expression->getResultType()->equals(*alias_column_to_resolve->getResultType())) - resolved_expression = buildCastFunction(resolved_expression, alias_column_to_resolve->getResultType(), scope.context, true); + column_name_to_column_node = std::move(alias_column_resolve_scope.column_name_to_column_node); column_name_to_column_node[alias_column_to_resolve_name] = alias_column_to_resolve; } diff --git a/src/Interpreters/getHeaderForProcessingStage.cpp b/src/Interpreters/getHeaderForProcessingStage.cpp index 75b0e710fbe..21739298036 100644 --- a/src/Interpreters/getHeaderForProcessingStage.cpp +++ b/src/Interpreters/getHeaderForProcessingStage.cpp @@ -121,12 +121,7 @@ Block getHeaderForProcessingStage( auto & table_expression_data = query_info.planner_context->getTableExpressionDataOrThrow(left_table_expression); const auto & query_context = query_info.planner_context->getQueryContext(); - - NamesAndTypes columns; - const auto & column_name_to_column = table_expression_data.getColumnNameToColumn(); - for (const auto & column_name : table_expression_data.getSelectedColumnsNames()) - columns.push_back(column_name_to_column.at(column_name)); - + auto columns = table_expression_data.getColumns(); auto new_query_node = buildSubqueryToReadColumnsFromTableExpression(columns, left_table_expression, query_context); query = new_query_node->toAST(); } diff --git a/src/Planner/CollectTableExpressionData.cpp b/src/Planner/CollectTableExpressionData.cpp index 385381f1355..78a7c7074c3 100644 --- a/src/Planner/CollectTableExpressionData.cpp +++ b/src/Planner/CollectTableExpressionData.cpp @@ -29,13 +29,34 @@ namespace class CollectSourceColumnsVisitor : public InDepthQueryTreeVisitor { public: - explicit CollectSourceColumnsVisitor(PlannerContextPtr & planner_context_, bool keep_alias_columns_ = true) + explicit CollectSourceColumnsVisitor(PlannerContext & planner_context_) : planner_context(planner_context_) - , keep_alias_columns(keep_alias_columns_) {} void visitImpl(QueryTreeNodePtr & node) { + /// Special case for USING clause which contains references to ALIAS columns. + /// We can not modify such ColumnNode. + if (auto * join_node = node->as()) + { + if (!join_node->isUsingJoinExpression()) + return; + + auto & using_list = join_node->getJoinExpression()->as(); + for (auto & using_element : using_list) + { + auto & column_node = using_element->as(); + /// This list contains column nodes from left and right tables. + auto & columns_from_subtrees = column_node.getExpressionOrThrow()->as().getNodes(); + + /// Visit left table column node. + visitUsingColumn(columns_from_subtrees[0]); + /// Visit right table column node. + visitUsingColumn(columns_from_subtrees[1]); + } + return; + } + auto * column_node = node->as(); if (!column_node) return; @@ -51,55 +72,22 @@ public: /// JOIN using expression if (column_node->hasExpression() && column_source_node_type == QueryTreeNodeType::JOIN) - { - auto & columns_from_subtrees = column_node->getExpression()->as().getNodes(); - if (columns_from_subtrees.size() != 2) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Expected two columns in JOIN using expression for column {}", column_node->dumpTree()); - - visit(columns_from_subtrees[0]); - visit(columns_from_subtrees[1]); return; - } - auto & table_expression_data = planner_context->getOrCreateTableExpressionData(column_source_node); + auto & table_expression_data = planner_context.getOrCreateTableExpressionData(column_source_node); - if (isAliasColumn(node)) + if (column_node->hasExpression() && column_source_node_type != QueryTreeNodeType::ARRAY_JOIN) { - /// Column is an ALIAS column with expression + /// Replace ALIAS column with expression bool column_already_exists = table_expression_data.hasColumn(column_node->getColumnName()); if (!column_already_exists) { - CollectSourceColumnsVisitor visitor_for_alias_column(planner_context); - /// While we are processing expression of ALIAS columns we should not add source columns to selected. - /// See also comment for `select_added_columns` - visitor_for_alias_column.select_added_columns = false; - visitor_for_alias_column.keep_alias_columns = keep_alias_columns; - visitor_for_alias_column.visit(column_node->getExpression()); - - if (!keep_alias_columns) - { - /// For PREWHERE we can just replace ALIAS column with it's expression, - /// because ActionsDAG for PREWHERE applied right on top of table expression - /// and cannot affect subqueries or other table expressions. - node = column_node->getExpression(); - return; - } - - auto column_identifier = planner_context->getGlobalPlannerContext()->createColumnIdentifier(node); - - ActionsDAGPtr alias_column_actions_dag = std::make_shared(); - PlannerActionsVisitor actions_visitor(planner_context, false); - auto outputs = actions_visitor.visit(alias_column_actions_dag, column_node->getExpression()); - if (outputs.size() != 1) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Expected single output in actions dag for alias column {}. Actual {}", column_node->dumpTree(), outputs.size()); - const auto & column_name = column_node->getColumnName(); - const auto & alias_node = alias_column_actions_dag->addAlias(*outputs[0], column_name); - alias_column_actions_dag->addOrReplaceInOutputs(alias_node); - table_expression_data.addAliasColumn(column_node->getColumn(), column_identifier, alias_column_actions_dag, select_added_columns); + auto column_identifier = planner_context.getGlobalPlannerContext()->createColumnIdentifier(node); + table_expression_data.addAliasColumnName(column_node->getColumnName(), column_identifier); } + node = column_node->getExpression(); + visitImpl(node); return; } @@ -114,58 +102,45 @@ public: bool column_already_exists = table_expression_data.hasColumn(column_node->getColumnName()); if (column_already_exists) - { - /// Column may be added when we collected data for ALIAS column - /// But now we see it directly in the query, so make sure it's marked as selected - if (select_added_columns) - table_expression_data.markSelectedColumn(column_node->getColumnName()); return; + + auto column_identifier = planner_context.getGlobalPlannerContext()->createColumnIdentifier(node); + table_expression_data.addColumn(column_node->getColumn(), column_identifier); + } + + static bool needChildVisit(const QueryTreeNodePtr & parent, const QueryTreeNodePtr & child_node) + { + if (auto * join_node = parent->as()) + { + if (join_node->getJoinExpression() == child_node && join_node->isUsingJoinExpression()) + return false; } - - auto column_identifier = planner_context->getGlobalPlannerContext()->createColumnIdentifier(node); - table_expression_data.addColumn(column_node->getColumn(), column_identifier, select_added_columns); - } - - static bool isAliasColumn(const QueryTreeNodePtr & node) - { - const auto * column_node = node->as(); - if (!column_node || !column_node->hasExpression()) - return false; - const auto & column_source = column_node->getColumnSourceOrNull(); - if (!column_source) - return false; - return column_source->getNodeType() != QueryTreeNodeType::JOIN && - column_source->getNodeType() != QueryTreeNodeType::ARRAY_JOIN; - } - - static bool needChildVisit(const QueryTreeNodePtr & parent_node, const QueryTreeNodePtr & child_node) - { auto child_node_type = child_node->getNodeType(); - return !(child_node_type == QueryTreeNodeType::QUERY || - child_node_type == QueryTreeNodeType::UNION || - isAliasColumn(parent_node)); - } - - void setKeepAliasColumns(bool keep_alias_columns_) - { - keep_alias_columns = keep_alias_columns_; + return !(child_node_type == QueryTreeNodeType::QUERY || child_node_type == QueryTreeNodeType::UNION); } private: - PlannerContextPtr & planner_context; - /// Replace ALIAS columns with their expressions or register them in table expression data. - /// Usually we can replace them when we build some "local" actions DAG - /// (for example Row Policy or PREWHERE) that is applied on top of the table expression. - /// In other cases, we keep ALIAS columns as ColumnNode with an expression child node, - /// and handle them in the Planner by inserting ActionsDAG to compute them after reading from storage. - bool keep_alias_columns = true; + void visitUsingColumn(QueryTreeNodePtr & node) + { + auto & column_node = node->as(); + if (column_node.hasExpression()) + { + auto & table_expression_data = planner_context.getOrCreateTableExpressionData(column_node.getColumnSource()); + bool column_already_exists = table_expression_data.hasColumn(column_node.getColumnName()); + if (column_already_exists) + return; - /// Flag `select_added_columns` indicates if we should mark column as explicitly selected. - /// For example, for table with columns (a Int32, b ALIAS a+1) and query SELECT b FROM table - /// Column `b` is selected explicitly by user, but not `a` (that is also read though). - /// Distinguishing such columns is important for checking access rights for ALIAS columns. - bool select_added_columns = true; + auto column_identifier = planner_context.getGlobalPlannerContext()->createColumnIdentifier(node); + table_expression_data.addAliasColumnName(column_node.getColumnName(), column_identifier); + + visitImpl(column_node.getExpressionOrThrow()); + } + else + visitImpl(node); + } + + PlannerContext & planner_context; }; class CollectPrewhereTableExpressionVisitor : public ConstInDepthQueryTreeVisitor @@ -299,7 +274,7 @@ void collectTableExpressionData(QueryTreeNodePtr & query_node, PlannerContextPtr } } - CollectSourceColumnsVisitor collect_source_columns_visitor(planner_context); + CollectSourceColumnsVisitor collect_source_columns_visitor(*planner_context); for (auto & node : query_node_typed.getChildren()) { if (!node || node == query_node_typed.getPrewhere()) @@ -325,26 +300,21 @@ void collectTableExpressionData(QueryTreeNodePtr & query_node, PlannerContextPtr } auto & table_expression_data = planner_context->getOrCreateTableExpressionData(prewhere_table_expression); - const auto & read_column_names = table_expression_data.getColumnNames(); - NameSet required_column_names_without_prewhere(read_column_names.begin(), read_column_names.end()); - const auto & selected_column_names = table_expression_data.getSelectedColumnsNames(); - required_column_names_without_prewhere.insert(selected_column_names.begin(), selected_column_names.end()); + const auto & column_names = table_expression_data.getColumnNames(); + NameSet required_column_names_without_prewhere(column_names.begin(), column_names.end()); - collect_source_columns_visitor.setKeepAliasColumns(false); collect_source_columns_visitor.visit(query_node_typed.getPrewhere()); auto prewhere_actions_dag = std::make_shared(); - QueryTreeNodePtr query_tree_node = query_node_typed.getPrewhere(); - PlannerActionsVisitor visitor(planner_context, false /*use_column_identifier_as_action_node_name*/); - auto expression_nodes = visitor.visit(prewhere_actions_dag, query_tree_node); + auto expression_nodes = visitor.visit(prewhere_actions_dag, query_node_typed.getPrewhere()); if (expression_nodes.size() != 1) throw Exception(ErrorCodes::ILLEGAL_PREWHERE, "Invalid PREWHERE. Expected single boolean expression. In query {}", query_node->formatASTForErrorMessage()); - prewhere_actions_dag->getOutputs().push_back(expression_nodes.back()); + prewhere_actions_dag->getOutputs().push_back(expression_nodes[0]); for (const auto & prewhere_input_node : prewhere_actions_dag->getInputs()) if (required_column_names_without_prewhere.contains(prewhere_input_node->result_name)) @@ -354,9 +324,9 @@ void collectTableExpressionData(QueryTreeNodePtr & query_node, PlannerContextPtr } } -void collectSourceColumns(QueryTreeNodePtr & expression_node, PlannerContextPtr & planner_context, bool keep_alias_columns) +void collectSourceColumns(QueryTreeNodePtr & expression_node, PlannerContextPtr & planner_context) { - CollectSourceColumnsVisitor collect_source_columns_visitor(planner_context, keep_alias_columns); + CollectSourceColumnsVisitor collect_source_columns_visitor(*planner_context); collect_source_columns_visitor.visit(expression_node); } diff --git a/src/Planner/CollectTableExpressionData.h b/src/Planner/CollectTableExpressionData.h index b0cebc15682..ed3f0ff7a47 100644 --- a/src/Planner/CollectTableExpressionData.h +++ b/src/Planner/CollectTableExpressionData.h @@ -19,6 +19,6 @@ void collectTableExpressionData(QueryTreeNodePtr & query_node, PlannerContextPtr * * ALIAS table column nodes are registered in table expression data and replaced in query tree with inner alias expression. */ -void collectSourceColumns(QueryTreeNodePtr & expression_node, PlannerContextPtr & planner_context, bool keep_alias_columns = true); +void collectSourceColumns(QueryTreeNodePtr & expression_node, PlannerContextPtr & planner_context); } diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index c417d463c73..511e9396a35 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -451,7 +451,6 @@ private: std::unordered_map node_to_node_name; const PlannerContextPtr planner_context; ActionNodeNameHelper action_node_name_helper; - bool use_column_identifier_as_action_node_name; }; PlannerActionsVisitorImpl::PlannerActionsVisitorImpl(ActionsDAGPtr actions_dag, @@ -459,7 +458,6 @@ PlannerActionsVisitorImpl::PlannerActionsVisitorImpl(ActionsDAGPtr actions_dag, bool use_column_identifier_as_action_node_name_) : planner_context(planner_context_) , action_node_name_helper(node_to_node_name, *planner_context, use_column_identifier_as_action_node_name_) - , use_column_identifier_as_action_node_name(use_column_identifier_as_action_node_name_) { actions_stack.emplace_back(std::move(actions_dag), nullptr); } @@ -505,8 +503,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi { auto column_node_name = action_node_name_helper.calculateActionNodeName(node); const auto & column_node = node->as(); - if (column_node.hasExpression() && !use_column_identifier_as_action_node_name) - return visitImpl(column_node.getExpression()); + Int64 actions_stack_size = static_cast(actions_stack.size() - 1); for (Int64 i = actions_stack_size; i >= 0; --i) { diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 7b3fb0c5c91..59da88f4e45 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -86,7 +86,7 @@ namespace /// Check if current user has privileges to SELECT columns from table /// Throws an exception if access to any column from `column_names` is not granted /// If `column_names` is empty, check access to any columns and return names of accessible columns -NameSet checkAccessRights(const TableNode & table_node, const Names & column_names, const ContextPtr & query_context) +NameSet checkAccessRights(const TableNode & table_node, Names & column_names, const ContextPtr & query_context) { /// StorageDummy is created on preliminary stage, ignore access check for it. if (typeid_cast(table_node.getStorage().get())) @@ -353,7 +353,9 @@ void prepareBuildQueryPlanForTableExpression(const QueryTreeNodePtr & table_expr NameSet columns_names_allowed_to_select; if (table_node) { - const auto & column_names_with_aliases = table_expression_data.getSelectedColumnsNames(); + auto column_names_with_aliases = columns_names; + const auto & alias_columns_names = table_expression_data.getAliasColumnsNames(); + column_names_with_aliases.insert(column_names_with_aliases.end(), alias_columns_names.begin(), alias_columns_names.end()); columns_names_allowed_to_select = checkAccessRights(*table_node, column_names_with_aliases, query_context); } @@ -862,28 +864,6 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres max_block_size, max_streams); - const auto & alias_column_expressions = table_expression_data.getAliasColumnExpressions(); - if (!alias_column_expressions.empty() && query_plan.isInitialized() && from_stage == QueryProcessingStage::FetchColumns) - { - ActionsDAGPtr merged_alias_columns_actions_dag = std::make_shared(query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName()); - ActionsDAG::NodeRawConstPtrs action_dag_outputs = merged_alias_columns_actions_dag->getInputs(); - - for (const auto & [column_name, alias_column_actions_dag] : alias_column_expressions) - { - const auto & current_outputs = alias_column_actions_dag->getOutputs(); - action_dag_outputs.insert(action_dag_outputs.end(), current_outputs.begin(), current_outputs.end()); - merged_alias_columns_actions_dag->mergeNodes(std::move(*alias_column_actions_dag)); - } - - for (const auto * output_node : action_dag_outputs) - merged_alias_columns_actions_dag->addOrReplaceInOutputs(*output_node); - merged_alias_columns_actions_dag->removeUnusedActions(false); - - auto alias_column_step = std::make_unique(query_plan.getCurrentDataStream(), std::move(merged_alias_columns_actions_dag)); - alias_column_step->setStepDescription("Compute alias columns"); - query_plan.addStep(std::move(alias_column_step)); - } - for (const auto & filter_info_and_description : where_filters) { const auto & [filter_info, description] = filter_info_and_description; @@ -927,8 +907,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres else { /// Create step which reads from empty source if storage has no data. - const auto & column_names = table_expression_data.getSelectedColumnsNames(); - auto source_header = storage_snapshot->getSampleBlockForColumns(column_names); + auto source_header = storage_snapshot->getSampleBlockForColumns(table_expression_data.getColumnNames()); Pipe pipe(std::make_shared(source_header)); auto read_from_pipe = std::make_unique(std::move(pipe)); read_from_pipe->setStepDescription("Read from NullSource"); @@ -1045,6 +1024,57 @@ void joinCastPlanColumnsToNullable(QueryPlan & plan_to_add_cast, PlannerContextP plan_to_add_cast.addStep(std::move(cast_join_columns_step)); } +/// Actions to calculate table columns that have a functional representation (ALIASes and subcolumns) +/// and used in USING clause of JOIN expression. +struct UsingAliasKeyActions +{ + UsingAliasKeyActions( + const ColumnsWithTypeAndName & left_plan_output_columns, + const ColumnsWithTypeAndName & right_plan_output_columns + ) + : left_alias_columns_keys(std::make_shared(left_plan_output_columns)) + , right_alias_columns_keys(std::make_shared(right_plan_output_columns)) + {} + + void addLeftColumn(QueryTreeNodePtr & node, const ColumnsWithTypeAndName & plan_output_columns, const PlannerContextPtr & planner_context) + { + addColumnImpl(left_alias_columns_keys, node, plan_output_columns, planner_context); + } + + void addRightColumn(QueryTreeNodePtr & node, const ColumnsWithTypeAndName & plan_output_columns, const PlannerContextPtr & planner_context) + { + addColumnImpl(right_alias_columns_keys, node, plan_output_columns, planner_context); + } + + ActionsDAGPtr getLeftActions() + { + left_alias_columns_keys->projectInput(); + return std::move(left_alias_columns_keys); + } + + ActionsDAGPtr getRightActions() + { + right_alias_columns_keys->projectInput(); + return std::move(right_alias_columns_keys); + } + +private: + void addColumnImpl(ActionsDAGPtr & alias_columns_keys, QueryTreeNodePtr & node, const ColumnsWithTypeAndName & plan_output_columns, const PlannerContextPtr & planner_context) + { + auto & column_node = node->as(); + if (column_node.hasExpression()) + { + auto dag = buildActionsDAGFromExpressionNode(column_node.getExpressionOrThrow(), plan_output_columns, planner_context); + const auto & left_inner_column_identifier = planner_context->getColumnNodeIdentifierOrThrow(node); + dag->addOrReplaceInOutputs(dag->addAlias(*dag->getOutputs().front(), left_inner_column_identifier)); + alias_columns_keys->mergeInplace(std::move(*dag)); + } + } + + ActionsDAGPtr left_alias_columns_keys; + ActionsDAGPtr right_alias_columns_keys; +}; + JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_expression, JoinTreeQueryPlan left_join_tree_query_plan, JoinTreeQueryPlan right_join_tree_query_plan, @@ -1113,6 +1143,8 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ if (join_node.isUsingJoinExpression()) { + UsingAliasKeyActions using_alias_key_actions{left_plan_output_columns, right_plan_output_columns}; + auto & join_node_using_columns_list = join_node.getJoinExpression()->as(); for (auto & join_node_using_node : join_node_using_columns_list.getNodes()) { @@ -1122,9 +1154,13 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ auto & left_inner_column_node = inner_columns_list.getNodes().at(0); auto & left_inner_column = left_inner_column_node->as(); + using_alias_key_actions.addLeftColumn(left_inner_column_node, left_plan_output_columns, planner_context); + auto & right_inner_column_node = inner_columns_list.getNodes().at(1); auto & right_inner_column = right_inner_column_node->as(); + using_alias_key_actions.addRightColumn(right_inner_column_node, right_plan_output_columns, planner_context); + const auto & join_node_using_column_node_type = join_node_using_column_node.getColumnType(); if (!left_inner_column.getColumnType()->equals(*join_node_using_column_node_type)) { @@ -1138,6 +1174,14 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ right_plan_column_name_to_cast_type.emplace(right_inner_column_identifier, join_node_using_column_node_type); } } + + auto left_alias_columns_keys_step = std::make_unique(left_plan.getCurrentDataStream(), using_alias_key_actions.getLeftActions()); + left_alias_columns_keys_step->setStepDescription("Actions for left table alias column keys"); + left_plan.addStep(std::move(left_alias_columns_keys_step)); + + auto right_alias_columns_keys_step = std::make_unique(right_plan.getCurrentDataStream(), using_alias_key_actions.getRightActions()); + right_alias_columns_keys_step->setStepDescription("Actions for right table alias column keys"); + right_plan.addStep(std::move(right_alias_columns_keys_step)); } auto join_cast_plan_output_nodes = [&](QueryPlan & plan_to_add_cast, std::unordered_map & plan_column_name_to_cast_type) diff --git a/src/Planner/TableExpressionData.h b/src/Planner/TableExpressionData.h index 9ab7a8e64fe..20c4f05ea7e 100644 --- a/src/Planner/TableExpressionData.h +++ b/src/Planner/TableExpressionData.h @@ -55,7 +55,7 @@ public: /// Return true if column with name exists, false otherwise bool hasColumn(const std::string & column_name) const { - return column_name_to_column.contains(column_name); + return alias_columns_names.contains(column_name) || column_name_to_column.contains(column_name); } /** Add column in table expression data. @@ -63,40 +63,37 @@ public: * * Logical error exception is thrown if column already exists. */ - void addColumn(const NameAndTypePair & column, const ColumnIdentifier & column_identifier, bool is_selected_column = true) + void addColumn(const NameAndTypePair & column, const ColumnIdentifier & column_identifier) { if (hasColumn(column.name)) throw Exception(ErrorCodes::LOGICAL_ERROR, "Column with name {} already exists", column.name); - column_names.push_back(column.name); - addColumnImpl(column, column_identifier, is_selected_column); + addColumnImpl(column, column_identifier); } - /// Add alias column - void addAliasColumn(const NameAndTypePair & column, const ColumnIdentifier & column_identifier, ActionsDAGPtr actions_dag, bool is_selected_column = true) + /** Add column if it does not exists in table expression data. + * Column identifier must be created using global planner context. + */ + void addColumnIfNotExists(const NameAndTypePair & column, const ColumnIdentifier & column_identifier) { - alias_column_expressions.emplace(column.name, std::move(actions_dag)); - addColumnImpl(column, column_identifier, is_selected_column); + if (hasColumn(column.name)) + return; + + addColumnImpl(column, column_identifier); } - /// Mark existing column as selected - void markSelectedColumn(const std::string & column_name) + /// Add alias column name + void addAliasColumnName(const std::string & column_name, const ColumnIdentifier & column_identifier) { - auto [_, inserted] = selected_column_names_set.emplace(column_name); - if (inserted) - selected_column_names.push_back(column_name); + alias_columns_names.insert(column_name); + + column_name_to_column_identifier.emplace(column_name, column_identifier); } - /// Get columns that are requested from table expression, including ALIAS columns - const Names & getSelectedColumnsNames() const + /// Get alias columns names + const NameSet & getAliasColumnsNames() const { - return selected_column_names; - } - - /// Get ALIAS columns names mapped to expressions - const std::unordered_map & getAliasColumnExpressions() const - { - return alias_column_expressions; + return alias_columns_names; } /// Get column name to column map @@ -105,7 +102,7 @@ public: return column_name_to_column; } - /// Get column names that are read from table expression + /// Get column names const Names & getColumnNames() const { return column_names; @@ -122,6 +119,23 @@ public: return result; } + ColumnIdentifiers getColumnIdentifiers() const + { + ColumnIdentifiers result; + result.reserve(column_identifier_to_column_name.size()); + + for (const auto & [column_identifier, _] : column_identifier_to_column_name) + result.push_back(column_identifier); + + return result; + } + + /// Get column name to column identifier map + const ColumnNameToColumnIdentifier & getColumnNameToIdentifier() const + { + return column_name_to_column_identifier; + } + /// Get column identifier to column name map const ColumnNameToColumnIdentifier & getColumnIdentifierToColumnName() const { @@ -145,6 +159,18 @@ public: return it->second; } + /** Get column for column name. + * Null is returned if there are no column for column name. + */ + const NameAndTypePair * getColumnOrNull(const std::string & column_name) const + { + auto it = column_name_to_column.find(column_name); + if (it == column_name_to_column.end()) + return nullptr; + + return &it->second; + } + /** Get column identifier for column name. * Exception is thrown if there are no column identifier for column name. */ @@ -174,6 +200,24 @@ public: return &it->second; } + /** Get column name for column identifier. + * Exception is thrown if there are no column name for column identifier. + */ + const std::string & getColumnNameOrThrow(const ColumnIdentifier & column_identifier) const + { + auto it = column_identifier_to_column_name.find(column_identifier); + if (it == column_identifier_to_column_name.end()) + { + auto column_identifiers = getColumnIdentifiers(); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Column name for column identifier {} does not exists. There are only column identifiers: {}", + column_identifier, + fmt::join(column_identifiers.begin(), column_identifiers.end(), ", ")); + } + + return it->second; + } + /** Get column name for column identifier. * Null is returned if there are no column name for column identifier. */ @@ -252,36 +296,23 @@ public: } private: - void addColumnImpl(const NameAndTypePair & column, const ColumnIdentifier & column_identifier, bool add_to_selected_columns) + void addColumnImpl(const NameAndTypePair & column, const ColumnIdentifier & column_identifier) { - if (add_to_selected_columns) - markSelectedColumn(column.name); - + column_names.push_back(column.name); column_name_to_column.emplace(column.name, column); column_name_to_column_identifier.emplace(column.name, column_identifier); column_identifier_to_column_name.emplace(column_identifier, column.name); } - /// Set of columns that are physically read from table expression - /// In case of ALIAS columns it contains source column names that are used to calculate alias - /// This source column may be not used by user + /// Valid for table, table function, array join, query, union nodes Names column_names; - /// Set of columns that are SELECTed from table expression - /// It may contain ALIAS columns. - /// Mainly it's used to determine access to which columns to check - /// For example user may have an access to column `a ALIAS x + y` but not to `x` and `y` - /// In that case we can read `x` and `y` and calculate `a`, but not return `x` and `y` to user - Names selected_column_names; - /// To deduplicate columns in `selected_column_names` - NameSet selected_column_names_set; - - /// Expression to calculate ALIAS columns - std::unordered_map alias_column_expressions; - /// Valid for table, table function, array join, query, union nodes ColumnNameToColumn column_name_to_column; + /// Valid only for table node + NameSet alias_columns_names; + /// Valid for table, table function, array join, query, union nodes ColumnNameToColumnIdentifier column_name_to_column_identifier; diff --git a/src/Planner/Utils.cpp b/src/Planner/Utils.cpp index bd0b831ee58..5f5875b8019 100644 --- a/src/Planner/Utils.cpp +++ b/src/Planner/Utils.cpp @@ -469,19 +469,12 @@ FilterDAGInfo buildFilterInfo(ASTPtr filter_expression, NameSet table_expression_required_names_without_filter) { const auto & query_context = planner_context->getQueryContext(); + auto filter_query_tree = buildQueryTree(filter_expression, query_context); QueryAnalysisPass query_analysis_pass(table_expression); query_analysis_pass.run(filter_query_tree, query_context); - return buildFilterInfo(std::move(filter_query_tree), table_expression, planner_context, std::move(table_expression_required_names_without_filter)); -} - -FilterDAGInfo buildFilterInfo(QueryTreeNodePtr filter_query_tree, - const QueryTreeNodePtr & table_expression, - PlannerContextPtr & planner_context, - NameSet table_expression_required_names_without_filter) -{ if (table_expression_required_names_without_filter.empty()) { auto & table_expression_data = planner_context->getTableExpressionDataOrThrow(table_expression); @@ -489,7 +482,7 @@ FilterDAGInfo buildFilterInfo(QueryTreeNodePtr filter_query_tree, table_expression_required_names_without_filter.insert(table_expression_names.begin(), table_expression_names.end()); } - collectSourceColumns(filter_query_tree, planner_context, false /*keep_alias_columns*/); + collectSourceColumns(filter_query_tree, planner_context); collectSets(filter_query_tree, *planner_context); auto filter_actions_dag = std::make_shared(); diff --git a/src/Planner/Utils.h b/src/Planner/Utils.h index bf45770552b..3060b1c2711 100644 --- a/src/Planner/Utils.h +++ b/src/Planner/Utils.h @@ -89,11 +89,6 @@ FilterDAGInfo buildFilterInfo(ASTPtr filter_expression, PlannerContextPtr & planner_context, NameSet table_expression_required_names_without_filter = {}); -FilterDAGInfo buildFilterInfo(QueryTreeNodePtr filter_query_tree, - const QueryTreeNodePtr & table_expression, - PlannerContextPtr & planner_context, - NameSet table_expression_required_names_without_filter = {}); - ASTPtr parseAdditionalResultFilter(const Settings & settings); } diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 331bd46f909..c7b9eb72d4d 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -1434,13 +1434,8 @@ void ReadFromMergeTree::applyFilters(ActionDAGNodes added_filter_nodes) if (query_info.planner_context) { const auto & table_expression_data = query_info.planner_context->getTableExpressionDataOrThrow(query_info.table_expression); - const auto & alias_column_expressions = table_expression_data.getAliasColumnExpressions(); for (const auto & [column_identifier, column_name] : table_expression_data.getColumnIdentifierToColumnName()) { - /// ALIAS columns cannot be used in the filter expression without being calculated in ActionsDAG, - /// so they should not be added to the input nodes. - if (alias_column_expressions.contains(column_name)) - continue; const auto & column = table_expression_data.getColumnOrThrow(column_name); node_name_to_input_node_column.emplace(column_identifier, ColumnWithTypeAndName(column.type, column_name)); } diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 7370bd3ab8f..92e7dcdf4f2 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -744,32 +744,6 @@ StorageSnapshotPtr StorageDistributed::getStorageSnapshotForQuery( namespace { -class ReplaseAliasColumnsVisitor : public InDepthQueryTreeVisitor -{ - static QueryTreeNodePtr getColumnNodeAliasExpression(const QueryTreeNodePtr & node) - { - const auto * column_node = node->as(); - if (!column_node || !column_node->hasExpression()) - return nullptr; - - const auto & column_source = column_node->getColumnSourceOrNull(); - if (!column_source || column_source->getNodeType() == QueryTreeNodeType::JOIN - || column_source->getNodeType() == QueryTreeNodeType::ARRAY_JOIN) - return nullptr; - - auto column_expression = column_node->getExpression(); - column_expression->setAlias(column_node->getColumnName()); - return column_expression; - } - -public: - void visitImpl(QueryTreeNodePtr & node) - { - if (auto column_expression = getColumnNodeAliasExpression(node)) - node = column_expression; - } -}; - QueryTreeNodePtr buildQueryTreeDistributed(SelectQueryInfo & query_info, const StorageSnapshotPtr & distributed_storage_snapshot, const StorageID & remote_storage_id, @@ -822,8 +796,6 @@ QueryTreeNodePtr buildQueryTreeDistributed(SelectQueryInfo & query_info, replacement_table_expression->setAlias(query_info.table_expression->getAlias()); auto query_tree_to_modify = query_info.query_tree->cloneAndReplace(query_info.table_expression, std::move(replacement_table_expression)); - ReplaseAliasColumnsVisitor replase_alias_columns_visitor; - replase_alias_columns_visitor.visit(query_tree_to_modify); return buildQueryTreeForShard(query_info.planner_context, query_tree_to_modify); } diff --git a/tests/analyzer_integration_broken_tests.txt b/tests/analyzer_integration_broken_tests.txt index 8f72fcd4050..796ca6bca22 100644 --- a/tests/analyzer_integration_broken_tests.txt +++ b/tests/analyzer_integration_broken_tests.txt @@ -4,3 +4,4 @@ test_distributed_type_object/test.py::test_distributed_type_object test_merge_table_over_distributed/test.py::test_global_in test_merge_table_over_distributed/test.py::test_select_table_name_from_merge_over_distributed test_passing_max_partitions_to_read_remotely/test.py::test_default_database_on_cluster +test_select_access_rights/test_main.py::test_alias_columns diff --git a/tests/integration/test_disabled_access_control_improvements/test_row_policy.py b/tests/integration/test_disabled_access_control_improvements/test_row_policy.py index c09a80cea06..b620e88e7eb 100644 --- a/tests/integration/test_disabled_access_control_improvements/test_row_policy.py +++ b/tests/integration/test_disabled_access_control_improvements/test_row_policy.py @@ -41,7 +41,7 @@ def started_cluster(): CREATE TABLE mydb.filtered_table2 (a UInt8, b UInt8, c UInt8, d UInt8) ENGINE MergeTree ORDER BY a; INSERT INTO mydb.filtered_table2 values (0, 0, 0, 0), (1, 2, 3, 4), (4, 3, 2, 1), (0, 0, 6, 0); - CREATE TABLE mydb.filtered_table3 (a UInt8, b UInt8, bb ALIAS b + 1, c UInt16 ALIAS a + bb - 1) ENGINE MergeTree ORDER BY a; + CREATE TABLE mydb.filtered_table3 (a UInt8, b UInt8, c UInt16 ALIAS a + b) ENGINE MergeTree ORDER BY a; INSERT INTO mydb.filtered_table3 values (0, 0), (0, 1), (1, 0), (1, 1); CREATE TABLE mydb.`.filtered_table4` (a UInt8, b UInt8, c UInt16 ALIAS a + b) ENGINE MergeTree ORDER BY a; diff --git a/tests/integration/test_row_policy/test.py b/tests/integration/test_row_policy/test.py index 8260be78e82..98653bf6106 100644 --- a/tests/integration/test_row_policy/test.py +++ b/tests/integration/test_row_policy/test.py @@ -60,7 +60,7 @@ def started_cluster(): CREATE TABLE mydb.filtered_table2 (a UInt8, b UInt8, c UInt8, d UInt8) ENGINE MergeTree ORDER BY a; INSERT INTO mydb.filtered_table2 values (0, 0, 0, 0), (1, 2, 3, 4), (4, 3, 2, 1), (0, 0, 6, 0); - CREATE TABLE mydb.filtered_table3 (a UInt8, b UInt8, bb ALIAS b + 1, c UInt16 ALIAS a + bb - 1) ENGINE MergeTree ORDER BY a; + CREATE TABLE mydb.filtered_table3 (a UInt8, b UInt8, c UInt16 ALIAS a + b) ENGINE MergeTree ORDER BY a; INSERT INTO mydb.filtered_table3 values (0, 0), (0, 1), (1, 0), (1, 1); CREATE TABLE mydb.`.filtered_table4` (a UInt8, b UInt8, c UInt16 ALIAS a + b) ENGINE MergeTree ORDER BY a; @@ -113,7 +113,6 @@ def test_smoke(): assert node.query("SELECT a FROM mydb.filtered_table3") == TSV([[0], [1]]) assert node.query("SELECT b FROM mydb.filtered_table3") == TSV([[1], [0]]) - assert node.query("SELECT bb FROM mydb.filtered_table3") == TSV([[2], [1]]) assert node.query("SELECT c FROM mydb.filtered_table3") == TSV([[1], [1]]) assert node.query("SELECT a + b FROM mydb.filtered_table3") == TSV([[1], [1]]) assert node.query("SELECT a FROM mydb.filtered_table3 WHERE c = 1") == TSV( diff --git a/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference b/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference index 2c62e278050..a5a71560d00 100644 --- a/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference +++ b/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference @@ -55,33 +55,33 @@ Header: a2 String Header: __table1.a2 String __table1.k UInt64 __table4.d2 String - Expression (DROP unused columns after JOIN) + Expression ((Actions for left table alias column keys + DROP unused columns after JOIN)) Header: __table1.a2 String __table1.k UInt64 Join (JOIN FillRightFirst) Header: __table1.a2 String __table1.k UInt64 - Expression (DROP unused columns after JOIN) + Expression ((Actions for left table alias column keys + DROP unused columns after JOIN)) Header: __table1.a2 String __table1.k UInt64 Join (JOIN FillRightFirst) Header: __table1.a2 String __table1.k UInt64 - Expression (Change column names to column identifiers) + Expression ((Actions for left table alias column keys + Change column names to column identifiers)) Header: __table1.a2 String __table1.k UInt64 ReadFromMemoryStorage Header: a2 String k UInt64 - Expression (Change column names to column identifiers) + Expression ((Actions for right table alias column keys + Change column names to column identifiers)) Header: __table2.k UInt64 ReadFromMemoryStorage Header: k UInt64 - Expression (Change column names to column identifiers) + Expression ((Actions for right table alias column keys + Change column names to column identifiers)) Header: __table3.k UInt64 ReadFromMemoryStorage Header: k UInt64 - Expression (Change column names to column identifiers) + Expression ((Actions for right table alias column keys + Change column names to column identifiers)) Header: __table4.d2 String __table4.k UInt64 ReadFromMemoryStorage diff --git a/tests/queries/0_stateless/02911_support_alias_column_in_indices.reference b/tests/queries/0_stateless/02911_support_alias_column_in_indices.reference index b867a31dcc3..883966ce6b5 100644 --- a/tests/queries/0_stateless/02911_support_alias_column_in_indices.reference +++ b/tests/queries/0_stateless/02911_support_alias_column_in_indices.reference @@ -14,13 +14,13 @@ Expression ((Projection + Before ORDER BY)) Parts: 1/1 Granules: 1/1 Expression ((Project names + Projection)) - Filter ((WHERE + (Change column names to column identifiers + Compute alias columns))) + Filter ((WHERE + Change column names to column identifiers)) ReadFromMergeTree (02911_support_alias_column_in_indices.test1) Indexes: PrimaryKey Keys: c - Condition: (plus(c, 1) in [11, +Inf)) + Condition: (_CAST(plus(c, \'UInt64\'), 1) in [11, +Inf)) Parts: 1/2 Granules: 1/2 Skip @@ -44,17 +44,12 @@ Expression ((Projection + Before ORDER BY)) Parts: 1/1 Granules: 1/1 Expression ((Project names + Projection)) - Filter ((WHERE + (Change column names to column identifiers + Compute alias columns))) + Filter ((WHERE + Change column names to column identifiers)) ReadFromMergeTree (02911_support_alias_column_in_indices.test2) Indexes: PrimaryKey Keys: c - Condition: (plus(plus(c, 1), 1) in [16, +Inf)) + Condition: (_CAST(plus(_CAST(plus(c, \'UInt64\'), 1), \'UInt64\'), 1) in [16, +Inf)) Parts: 1/2 Granules: 1/2 - Skip - Name: i - Description: minmax GRANULARITY 1 - Parts: 1/1 - Granules: 1/1