Updated implementation

This commit is contained in:
Maksim Kita 2023-11-10 16:29:19 +03:00
parent 1562e24232
commit 5f009e99f6
7 changed files with 95 additions and 36 deletions

View File

@ -182,7 +182,7 @@ public:
struct ConvertToASTOptions
{
/// Add _CAST if constant litral type is different from column type
/// Add _CAST if constant literal type is different from column type
bool add_cast_for_constants = true;
/// Identifiers are fully qualified (`database.table.column`), otherwise names are just column names (`column`)

View File

@ -188,7 +188,7 @@ private:
if (auto * table_function_node = parent->as<TableFunctionNode>())
{
if (child != table_function_node->getArgumentsNode())
throw Exception(ErrorCodes::LOGICAL_ERROR, "TableFunctioNode is expected to have only one child node");
throw Exception(ErrorCodes::LOGICAL_ERROR, "TableFunctionNode is expected to have only one child node");
const auto & unresolved_indexes = table_function_node->getUnresolvedArgumentIndexes();

View File

@ -1,11 +1,14 @@
#include <Analyzer/Passes/RemoveUnusedProjectionColumnsPass.h>
#include <Functions/FunctionFactory.h>
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/QueryNode.h>
#include <Analyzer/ColumnNode.h>
#include <Analyzer/SortNode.h>
#include <Functions/FunctionFactory.h>
#include <Analyzer/AggregationUtils.h>
#include <Analyzer/Utils.h>
namespace DB
{
@ -13,13 +16,15 @@ namespace DB
namespace
{
class CollectUsedColumnsVisitor : public InDepthQueryTreeVisitor<CollectUsedColumnsVisitor>
class CollectUsedColumnsVisitor : public InDepthQueryTreeVisitorWithContext<CollectUsedColumnsVisitor>
{
public:
using Base = InDepthQueryTreeVisitorWithContext<CollectUsedColumnsVisitor>;
using Base::Base;
bool needChildVisit(QueryTreeNodePtr &, QueryTreeNodePtr & child)
{
auto node_type = child->getNodeType();
if (node_type == QueryTreeNodeType::QUERY || node_type == QueryTreeNodeType::UNION)
if (isQueryOrUnionNode(child))
{
subqueries_nodes_to_visit.insert(child);
return false;
@ -28,9 +33,21 @@ public:
return true;
}
void visitImpl(QueryTreeNodePtr & node)
void enterImpl(QueryTreeNodePtr & node)
{
auto node_type = node->getNodeType();
if (node_type == QueryTreeNodeType::QUERY)
{
auto & query_node = node->as<QueryNode &>();
auto table_expressions = extractTableExpressions(query_node.getJoinTree());
for (const auto & table_expression : table_expressions)
if (isQueryOrUnionNode(table_expression))
query_or_union_node_to_used_columns.emplace(table_expression, std::unordered_set<std::string>());
return;
}
if (node_type != QueryTreeNodeType::COLUMN)
return;
@ -39,10 +56,7 @@ public:
auto column_source_node_type = column_source_node->getNodeType();
if (column_source_node_type == QueryTreeNodeType::QUERY || column_source_node_type == QueryTreeNodeType::UNION)
{
auto * column_source_node_ptr = column_source_node.get();
query_or_union_node_to_used_columns[column_source_node_ptr].insert(column_node.getColumnName());
}
query_or_union_node_to_used_columns[column_source_node].insert(column_node.getColumnName());
}
void reset()
@ -52,17 +66,59 @@ public:
}
std::unordered_set<QueryTreeNodePtr> subqueries_nodes_to_visit;
std::unordered_map<IQueryTreeNode *, std::unordered_set<std::string>> query_or_union_node_to_used_columns;
std::unordered_map<QueryTreeNodePtr, std::unordered_set<std::string>> query_or_union_node_to_used_columns;
};
std::unordered_set<size_t> convertUsedColumnNamesToUsedProjectionIndexes(const QueryTreeNodePtr & query_or_union_node, const std::unordered_set<std::string> & used_column_names)
{
std::unordered_set<size_t> result;
auto * union_node = query_or_union_node->as<UnionNode>();
auto * query_node = query_or_union_node->as<QueryNode>();
const auto & projection_columns = query_node ? query_node->getProjectionColumns() : union_node->computeProjectionColumns();
size_t projection_columns_size = projection_columns.size();
for (size_t i = 0; i < projection_columns_size; ++i)
{
const auto & projection_column = projection_columns[i];
if (used_column_names.contains(projection_column.name))
result.insert(i);
}
return result;
}
/// We cannot remove aggregate functions, if query does not contain GROUP BY or arrayJoin from subquery projection
void updateUsedProjectionIndexes(const QueryTreeNodePtr & query_or_union_node, std::unordered_set<size_t> & used_projection_columns_indexes)
{
if (auto * union_node = query_or_union_node->as<UnionNode>())
{
for (auto & query_node : union_node->getQueries().getNodes())
updateUsedProjectionIndexes(query_node, used_projection_columns_indexes);
return;
}
const auto & query_node = query_or_union_node->as<const QueryNode &>();
const auto & projection_nodes = query_node.getProjection().getNodes();
size_t projection_nodes_size = projection_nodes.size();
for (size_t i = 0; i < projection_nodes_size; ++i)
{
const auto & projection_node = projection_nodes[i];
if ((!query_node.hasGroupBy() && hasAggregateFunctionNodes(projection_node)) && hasFunctionNode(projection_node, "arrayJoin"))
used_projection_columns_indexes.insert(i);
}
}
}
void RemoveUnusedProjectionColumnsPass::run(QueryTreeNodePtr query_tree_node, ContextPtr)
void RemoveUnusedProjectionColumnsPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
{
std::vector<QueryTreeNodePtr> nodes_to_visit;
nodes_to_visit.push_back(query_tree_node);
CollectUsedColumnsVisitor visitor;
CollectUsedColumnsVisitor visitor(std::move(context));
while (!nodes_to_visit.empty())
{
@ -73,10 +129,16 @@ void RemoveUnusedProjectionColumnsPass::run(QueryTreeNodePtr query_tree_node, Co
for (auto & [query_or_union_node, used_columns] : visitor.query_or_union_node_to_used_columns)
{
auto used_projection_indexes = convertUsedColumnNamesToUsedProjectionIndexes(query_or_union_node, used_columns);
updateUsedProjectionIndexes(query_or_union_node, used_projection_indexes);
/// Keep at least 1 column if used columns are empty
used_projection_indexes.insert(0);
if (auto * union_node = query_or_union_node->as<UnionNode>())
union_node->removeUnusedProjectionColumns(used_columns);
union_node->removeUnusedProjectionColumns(used_projection_indexes);
else if (auto * query_node = query_or_union_node->as<QueryNode>())
query_node->removeUnusedProjectionColumns(used_columns);
query_node->removeUnusedProjectionColumns(used_projection_indexes);
}
for (const auto & subquery_node_to_visit : visitor.subqueries_nodes_to_visit)

View File

@ -57,14 +57,6 @@ void QueryNode::resolveProjectionColumns(NamesAndTypes projection_columns_value)
void QueryNode::removeUnusedProjectionColumns(const std::unordered_set<std::string> & used_projection_columns)
{
auto & projection_nodes = getProjection().getNodes();
if (used_projection_columns.empty())
{
/// Keep at least 1 column if used columns are empty
projection_nodes.erase(projection_nodes.begin() + 1, projection_nodes.end());
projection_columns.erase(projection_columns.begin() + 1, projection_columns.end());
}
size_t projection_columns_size = projection_columns.size();
size_t write_index = 0;
@ -85,14 +77,6 @@ void QueryNode::removeUnusedProjectionColumns(const std::unordered_set<std::stri
void QueryNode::removeUnusedProjectionColumns(const std::unordered_set<size_t> & used_projection_columns_indexes)
{
auto & projection_nodes = getProjection().getNodes();
if (used_projection_columns_indexes.empty())
{
/// Keep at least 1 column if used columns are empty
projection_nodes.erase(projection_nodes.begin() + 1, projection_nodes.end());
projection_columns.erase(projection_columns.begin() + 1, projection_columns.end());
}
size_t projection_columns_size = projection_columns.size();
size_t write_index = 0;

View File

@ -152,6 +152,17 @@ void makeUniqueColumnNamesInBlock(Block & block)
}
}
bool isQueryOrUnionNode(const IQueryTreeNode * node)
{
auto node_type = node->getNodeType();
return node_type == QueryTreeNodeType::QUERY || node_type == QueryTreeNodeType::UNION;
}
bool isQueryOrUnionNode(const QueryTreeNodePtr & node)
{
return isQueryOrUnionNode(node.get());
}
QueryTreeNodePtr buildCastFunction(const QueryTreeNodePtr & expression,
const DataTypePtr & type,
const ContextPtr & context,

View File

@ -27,6 +27,12 @@ std::string getGlobalInFunctionNameForLocalInFunctionName(const std::string & fu
/// Add unique suffix to names of duplicate columns in block
void makeUniqueColumnNamesInBlock(Block & block);
/// Returns true, if node has type QUERY or UNION
bool isQueryOrUnionNode(const IQueryTreeNode * node);
/// Returns true, if node has type QUERY or UNION
bool isQueryOrUnionNode(const QueryTreeNodePtr & node);
/** Build cast function that cast expression into type.
* If resolve = true, then result cast function is resolved during build, otherwise
* result cast function is not resolved during build.

View File

@ -1,5 +1,4 @@
00223_shard_distributed_aggregation_memory_efficient
00593_union_all_assert_columns_removed
00717_merge_and_distributed
00725_memory_tracking
01062_pm_all_join_with_block_continuation
@ -11,12 +10,9 @@
01244_optimize_distributed_group_by_sharding_key
01268_mv_scalars
01268_shard_avgweighted
01287_max_execution_speed
01455_shard_leaf_max_rows_bytes_to_read
01495_subqueries_in_with_statement
01560_merge_distributed_join
01584_distributed_buffer_cannot_find_column
01586_columns_pruning
01624_soft_constraints
01656_test_query_log_factories_info
01739_index_hint