diff --git a/src/Analyzer/IQueryTreeNode.h b/src/Analyzer/IQueryTreeNode.h index 3f6816696b4..922eaabe75c 100644 --- a/src/Analyzer/IQueryTreeNode.h +++ b/src/Analyzer/IQueryTreeNode.h @@ -182,7 +182,7 @@ public: struct ConvertToASTOptions { - /// Add _CAST if constant litral type is different from column type + /// Add _CAST if constant literal type is different from column type bool add_cast_for_constants = true; /// Identifiers are fully qualified (`database.table.column`), otherwise names are just column names (`column`) diff --git a/src/Analyzer/InDepthQueryTreeVisitor.h b/src/Analyzer/InDepthQueryTreeVisitor.h index dec329b5403..62ddc06659c 100644 --- a/src/Analyzer/InDepthQueryTreeVisitor.h +++ b/src/Analyzer/InDepthQueryTreeVisitor.h @@ -188,7 +188,7 @@ private: if (auto * table_function_node = parent->as()) { if (child != table_function_node->getArgumentsNode()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "TableFunctioNode is expected to have only one child node"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "TableFunctionNode is expected to have only one child node"); const auto & unresolved_indexes = table_function_node->getUnresolvedArgumentIndexes(); diff --git a/src/Analyzer/Passes/RemoveUnusedProjectionColumnsPass.cpp b/src/Analyzer/Passes/RemoveUnusedProjectionColumnsPass.cpp index 223ccf67380..29626c97d68 100644 --- a/src/Analyzer/Passes/RemoveUnusedProjectionColumnsPass.cpp +++ b/src/Analyzer/Passes/RemoveUnusedProjectionColumnsPass.cpp @@ -1,11 +1,14 @@ #include +#include + #include #include #include #include #include -#include +#include +#include namespace DB { @@ -13,13 +16,15 @@ namespace DB namespace { -class CollectUsedColumnsVisitor : public InDepthQueryTreeVisitor +class CollectUsedColumnsVisitor : public InDepthQueryTreeVisitorWithContext { public: + using Base = InDepthQueryTreeVisitorWithContext; + using Base::Base; + bool needChildVisit(QueryTreeNodePtr &, QueryTreeNodePtr & child) { - auto node_type = child->getNodeType(); - if (node_type == QueryTreeNodeType::QUERY || node_type == QueryTreeNodeType::UNION) + if (isQueryOrUnionNode(child)) { subqueries_nodes_to_visit.insert(child); return false; @@ -28,9 +33,21 @@ public: return true; } - void visitImpl(QueryTreeNodePtr & node) + void enterImpl(QueryTreeNodePtr & node) { auto node_type = node->getNodeType(); + + if (node_type == QueryTreeNodeType::QUERY) + { + auto & query_node = node->as(); + auto table_expressions = extractTableExpressions(query_node.getJoinTree()); + for (const auto & table_expression : table_expressions) + if (isQueryOrUnionNode(table_expression)) + query_or_union_node_to_used_columns.emplace(table_expression, std::unordered_set()); + + return; + } + if (node_type != QueryTreeNodeType::COLUMN) return; @@ -39,10 +56,7 @@ public: auto column_source_node_type = column_source_node->getNodeType(); if (column_source_node_type == QueryTreeNodeType::QUERY || column_source_node_type == QueryTreeNodeType::UNION) - { - auto * column_source_node_ptr = column_source_node.get(); - query_or_union_node_to_used_columns[column_source_node_ptr].insert(column_node.getColumnName()); - } + query_or_union_node_to_used_columns[column_source_node].insert(column_node.getColumnName()); } void reset() @@ -52,17 +66,59 @@ public: } std::unordered_set subqueries_nodes_to_visit; - std::unordered_map> query_or_union_node_to_used_columns; + std::unordered_map> query_or_union_node_to_used_columns; }; +std::unordered_set convertUsedColumnNamesToUsedProjectionIndexes(const QueryTreeNodePtr & query_or_union_node, const std::unordered_set & used_column_names) +{ + std::unordered_set result; + + auto * union_node = query_or_union_node->as(); + auto * query_node = query_or_union_node->as(); + + const auto & projection_columns = query_node ? query_node->getProjectionColumns() : union_node->computeProjectionColumns(); + size_t projection_columns_size = projection_columns.size(); + + for (size_t i = 0; i < projection_columns_size; ++i) + { + const auto & projection_column = projection_columns[i]; + if (used_column_names.contains(projection_column.name)) + result.insert(i); + } + + return result; +} + +/// We cannot remove aggregate functions, if query does not contain GROUP BY or arrayJoin from subquery projection +void updateUsedProjectionIndexes(const QueryTreeNodePtr & query_or_union_node, std::unordered_set & used_projection_columns_indexes) +{ + if (auto * union_node = query_or_union_node->as()) + { + for (auto & query_node : union_node->getQueries().getNodes()) + updateUsedProjectionIndexes(query_node, used_projection_columns_indexes); + return; + } + + const auto & query_node = query_or_union_node->as(); + const auto & projection_nodes = query_node.getProjection().getNodes(); + size_t projection_nodes_size = projection_nodes.size(); + + for (size_t i = 0; i < projection_nodes_size; ++i) + { + const auto & projection_node = projection_nodes[i]; + if ((!query_node.hasGroupBy() && hasAggregateFunctionNodes(projection_node)) && hasFunctionNode(projection_node, "arrayJoin")) + used_projection_columns_indexes.insert(i); + } +} + } -void RemoveUnusedProjectionColumnsPass::run(QueryTreeNodePtr query_tree_node, ContextPtr) +void RemoveUnusedProjectionColumnsPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context) { std::vector nodes_to_visit; nodes_to_visit.push_back(query_tree_node); - CollectUsedColumnsVisitor visitor; + CollectUsedColumnsVisitor visitor(std::move(context)); while (!nodes_to_visit.empty()) { @@ -73,10 +129,16 @@ void RemoveUnusedProjectionColumnsPass::run(QueryTreeNodePtr query_tree_node, Co for (auto & [query_or_union_node, used_columns] : visitor.query_or_union_node_to_used_columns) { + auto used_projection_indexes = convertUsedColumnNamesToUsedProjectionIndexes(query_or_union_node, used_columns); + updateUsedProjectionIndexes(query_or_union_node, used_projection_indexes); + + /// Keep at least 1 column if used columns are empty + used_projection_indexes.insert(0); + if (auto * union_node = query_or_union_node->as()) - union_node->removeUnusedProjectionColumns(used_columns); + union_node->removeUnusedProjectionColumns(used_projection_indexes); else if (auto * query_node = query_or_union_node->as()) - query_node->removeUnusedProjectionColumns(used_columns); + query_node->removeUnusedProjectionColumns(used_projection_indexes); } for (const auto & subquery_node_to_visit : visitor.subqueries_nodes_to_visit) diff --git a/src/Analyzer/QueryNode.cpp b/src/Analyzer/QueryNode.cpp index 529631f045d..738b1ac62e8 100644 --- a/src/Analyzer/QueryNode.cpp +++ b/src/Analyzer/QueryNode.cpp @@ -57,14 +57,6 @@ void QueryNode::resolveProjectionColumns(NamesAndTypes projection_columns_value) void QueryNode::removeUnusedProjectionColumns(const std::unordered_set & used_projection_columns) { auto & projection_nodes = getProjection().getNodes(); - - if (used_projection_columns.empty()) - { - /// Keep at least 1 column if used columns are empty - projection_nodes.erase(projection_nodes.begin() + 1, projection_nodes.end()); - projection_columns.erase(projection_columns.begin() + 1, projection_columns.end()); - } - size_t projection_columns_size = projection_columns.size(); size_t write_index = 0; @@ -85,14 +77,6 @@ void QueryNode::removeUnusedProjectionColumns(const std::unordered_set & used_projection_columns_indexes) { auto & projection_nodes = getProjection().getNodes(); - - if (used_projection_columns_indexes.empty()) - { - /// Keep at least 1 column if used columns are empty - projection_nodes.erase(projection_nodes.begin() + 1, projection_nodes.end()); - projection_columns.erase(projection_columns.begin() + 1, projection_columns.end()); - } - size_t projection_columns_size = projection_columns.size(); size_t write_index = 0; diff --git a/src/Analyzer/Utils.cpp b/src/Analyzer/Utils.cpp index 9b3a77e7e93..a3c9813f3d0 100644 --- a/src/Analyzer/Utils.cpp +++ b/src/Analyzer/Utils.cpp @@ -152,6 +152,17 @@ void makeUniqueColumnNamesInBlock(Block & block) } } +bool isQueryOrUnionNode(const IQueryTreeNode * node) +{ + auto node_type = node->getNodeType(); + return node_type == QueryTreeNodeType::QUERY || node_type == QueryTreeNodeType::UNION; +} + +bool isQueryOrUnionNode(const QueryTreeNodePtr & node) +{ + return isQueryOrUnionNode(node.get()); +} + QueryTreeNodePtr buildCastFunction(const QueryTreeNodePtr & expression, const DataTypePtr & type, const ContextPtr & context, diff --git a/src/Analyzer/Utils.h b/src/Analyzer/Utils.h index 2bf12f01d8a..060dc7d8bc0 100644 --- a/src/Analyzer/Utils.h +++ b/src/Analyzer/Utils.h @@ -27,6 +27,12 @@ std::string getGlobalInFunctionNameForLocalInFunctionName(const std::string & fu /// Add unique suffix to names of duplicate columns in block void makeUniqueColumnNamesInBlock(Block & block); +/// Returns true, if node has type QUERY or UNION +bool isQueryOrUnionNode(const IQueryTreeNode * node); + +/// Returns true, if node has type QUERY or UNION +bool isQueryOrUnionNode(const QueryTreeNodePtr & node); + /** Build cast function that cast expression into type. * If resolve = true, then result cast function is resolved during build, otherwise * result cast function is not resolved during build. diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt index 833b04d5648..e5283f55d61 100644 --- a/tests/analyzer_tech_debt.txt +++ b/tests/analyzer_tech_debt.txt @@ -1,5 +1,4 @@ 00223_shard_distributed_aggregation_memory_efficient -00593_union_all_assert_columns_removed 00717_merge_and_distributed 00725_memory_tracking 01062_pm_all_join_with_block_continuation @@ -11,12 +10,9 @@ 01244_optimize_distributed_group_by_sharding_key 01268_mv_scalars 01268_shard_avgweighted -01287_max_execution_speed -01455_shard_leaf_max_rows_bytes_to_read 01495_subqueries_in_with_statement 01560_merge_distributed_join 01584_distributed_buffer_cannot_find_column -01586_columns_pruning 01624_soft_constraints 01656_test_query_log_factories_info 01739_index_hint