Added UNION support

This commit is contained in:
Maksim Kita 2022-08-23 11:50:02 +02:00
parent 0da0019634
commit 85193ede42
15 changed files with 825 additions and 159 deletions

View File

@ -35,6 +35,7 @@ const char * toString(QueryTreeNodeType type)
case QueryTreeNodeType::QUERY: return "QUERY";
case QueryTreeNodeType::ARRAY_JOIN: return "ARRAY_JOIN";
case QueryTreeNodeType::JOIN: return "JOIN";
case QueryTreeNodeType::UNION: return "UNION";
}
}

View File

@ -39,7 +39,8 @@ enum class QueryTreeNodeType
TABLE_FUNCTION,
QUERY,
ARRAY_JOIN,
JOIN
JOIN,
UNION
};
/// Convert query tree node type to string

View File

@ -45,6 +45,7 @@
#include <Analyzer/QueryNode.h>
#include <Analyzer/ArrayJoinNode.h>
#include <Analyzer/JoinNode.h>
#include <Analyzer/UnionNode.h>
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/QueryTreeBuilder.h>
@ -174,7 +175,6 @@ namespace ErrorCodes
* TODO: Lookup functions arrayReduce(sum, [1, 2, 3]);
* TODO: SELECT (compound_expression).*, (compound_expression).COLUMNS are not supported on parser level.
* TODO: SELECT a.b.c.*, a.b.c.COLUMNS. Qualified matcher where identifier size is greater than 2 are not supported on parser level.
* TODO: UNION
* TODO: JOIN support SELF JOIN with MergeTree. JOIN support matchers.
* TODO: WINDOW functions
* TODO: Table expression modifiers final, sample_size, sample_offset
@ -736,19 +736,25 @@ public:
{
IdentifierResolveScope scope(node, nullptr /*parent_scope*/);
if (node->getNodeType() == QueryTreeNodeType::QUERY)
auto node_type = node->getNodeType();
if (node_type == QueryTreeNodeType::UNION)
{
resolveUnion(node, scope);
}
else if (node_type == QueryTreeNodeType::QUERY)
{
resolveQuery(node, scope);
}
else if (node->getNodeType() == QueryTreeNodeType::LIST)
else if (node_type == QueryTreeNodeType::LIST)
{
resolveExpressionNodeList(node, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
}
else if (node->getNodeType() == QueryTreeNodeType::FUNCTION)
else if (node_type == QueryTreeNodeType::FUNCTION)
{
resolveExpressionNode(node, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
}
else if (node->getNodeType() == QueryTreeNodeType::LAMBDA)
else if (node_type == QueryTreeNodeType::LAMBDA)
{
resolveLambda(node, {}, scope);
}
@ -814,6 +820,8 @@ private:
void resolveQuery(const QueryTreeNodePtr & query_node, IdentifierResolveScope & scope);
void resolveUnion(const QueryTreeNodePtr & union_node, IdentifierResolveScope & scope);
/// Query analyzer context
ContextPtr context;
@ -1250,11 +1258,12 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromTable(const IdentifierLo
{
auto * table_node = table_expression_node->as<TableNode>();
auto * query_node = table_expression_node->as<QueryNode>();
auto * union_node = table_expression_node->as<UnionNode>();
auto * table_function_node = table_expression_node->as<TableFunctionNode>();
if (!table_node && !table_function_node && !query_node)
if (!table_node && !table_function_node && !query_node && !union_node)
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
"Unexpected table expression. Expected table, table function or query node. Actual {}. In scope {}",
"Unexpected table expression. Expected table, table function, query or union node. Actual {}. In scope {}",
table_expression_node->formatASTForErrorMessage(),
scope.scope_node->formatASTForErrorMessage());
@ -1273,12 +1282,12 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromTable(const IdentifierLo
database_name = table_storage_id.database_name;
table_expression_name = table_storage_id.getFullNameNotQuoted();
}
else if (query_node)
else if (query_node || union_node)
{
table_name = query_node->getCTEName();
table_name = query_node ? query_node->getCTEName() : union_node->getCTEName();
if (query_node->hasAlias())
table_expression_name = query_node->getAlias();
if (table_expression_node->hasAlias())
table_expression_name = table_expression_node->getAlias();
}
else if (table_function_node)
{
@ -1555,15 +1564,13 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoinTreeNode(const Ident
switch (join_tree_node_type)
{
case QueryTreeNodeType::JOIN:
{
return tryResolveIdentifierFromJoin(identifier_lookup, join_tree_node, scope);
}
case QueryTreeNodeType::ARRAY_JOIN:
{
return tryResolveIdentifierFromArrayJoin(identifier_lookup, join_tree_node, scope);
}
case QueryTreeNodeType::QUERY:
[[fallthrough]];
case QueryTreeNodeType::UNION:
[[fallthrough]];
case QueryTreeNodeType::TABLE:
[[fallthrough]];
case QueryTreeNodeType::TABLE_FUNCTION:
@ -1580,7 +1587,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoinTreeNode(const Ident
default:
{
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Scope FROM section expected table, table function, query, join or array join. Actual {}. In scope {}",
"Scope FROM section expected table, table function, query, union, join or array join. Actual {}. In scope {}",
join_tree_node->formatASTForErrorMessage(),
scope.scope_node->formatASTForErrorMessage());
}
@ -1609,10 +1616,10 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoinTree(const Identifie
return resolved_identifier;
auto * query_scope_node = scope.scope_node->as<QueryNode>();
if (!query_scope_node || !query_scope_node->getFrom())
if (!query_scope_node || !query_scope_node->getJoinTree())
return {};
const auto & join_tree_node = query_scope_node->getFrom();
const auto & join_tree_node = query_scope_node->getJoinTree();
return tryResolveIdentifierFromJoinTreeNode(identifier_lookup, join_tree_node, scope);
}
@ -1949,7 +1956,7 @@ QueryTreeNodePtr QueryAnalyzer::resolveMatcher(QueryTreeNodePtr & matcher_node,
}
/// If there are no parent scope that has tables or query scope does not have FROM section
if (!scope_query_node || !scope_query_node->getFrom())
if (!scope_query_node || !scope_query_node->getJoinTree())
{
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
"Unqualified matcher {} cannot be resolved. There are no table sources. In scope {}",
@ -1959,9 +1966,9 @@ QueryTreeNodePtr QueryAnalyzer::resolveMatcher(QueryTreeNodePtr & matcher_node,
NamesAndTypesList initial_matcher_columns;
auto * from_query_node = scope_query_node->getFrom()->as<QueryNode>();
auto * from_table_node = scope_query_node->getFrom()->as<TableNode>();
auto * from_table_function_node = scope_query_node->getFrom()->as<TableFunctionNode>();
auto * from_query_node = scope_query_node->getJoinTree()->as<QueryNode>();
auto * from_table_node = scope_query_node->getJoinTree()->as<TableNode>();
auto * from_table_function_node = scope_query_node->getJoinTree()->as<TableFunctionNode>();
if (from_query_node)
{
@ -1996,14 +2003,14 @@ QueryTreeNodePtr QueryAnalyzer::resolveMatcher(QueryTreeNodePtr & matcher_node,
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Unqualified matcher resolve unexpected FROM section {}",
scope_query_node->getFrom()->formatASTForErrorMessage());
scope_query_node->getJoinTree()->formatASTForErrorMessage());
}
for (auto & column : initial_matcher_columns)
{
const auto & column_name = column.name;
if (matcher_node_typed.isMatchingColumn(column_name))
matcher_expression_nodes.push_back(std::make_shared<ColumnNode>(column, scope_query_node->getFrom()));
matcher_expression_nodes.push_back(std::make_shared<ColumnNode>(column, scope_query_node->getJoinTree()));
}
}
@ -2828,6 +2835,16 @@ void QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, IdentifierRes
break;
}
case QueryTreeNodeType::UNION:
{
IdentifierResolveScope subquery_scope(node, &scope /*parent_scope*/);
resolveUnion(node, subquery_scope);
if (!allow_table_expression)
evaluateScalarSubquery(node);
break;
}
case QueryTreeNodeType::ARRAY_JOIN:
{
throw Exception(ErrorCodes::LOGICAL_ERROR,
@ -2971,6 +2988,11 @@ void QueryAnalyzer::initializeQueryJoinTreeNode(QueryTreeNodePtr & join_tree_nod
scope.table_expressions_in_resolve_process.insert(current_join_tree_node.get());
break;
}
case QueryTreeNodeType::UNION:
{
scope.table_expressions_in_resolve_process.insert(current_join_tree_node.get());
break;
}
case QueryTreeNodeType::TABLE_FUNCTION:
{
scope.table_expressions_in_resolve_process.insert(current_join_tree_node.get());
@ -2999,7 +3021,7 @@ void QueryAnalyzer::initializeQueryJoinTreeNode(QueryTreeNodePtr & join_tree_nod
default:
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Query FROM section expected table, table function, query, ARRAY JOIN or JOIN. Actual {} {}. In scope {}",
"Query FROM section expected table, table function, query, UNION, ARRAY JOIN or JOIN. Actual {} {}. In scope {}",
current_join_tree_node->getNodeTypeName(),
current_join_tree_node->formatASTForErrorMessage(),
scope.scope_node->formatASTForErrorMessage());
@ -3013,11 +3035,12 @@ void QueryAnalyzer::initializeTableExpressionColumns(QueryTreeNodePtr & table_ex
{
auto * table_node = table_expression_node->as<TableNode>();
auto * query_node = table_expression_node->as<QueryNode>();
auto * union_node = table_expression_node->as<UnionNode>();
auto * table_function_node = table_expression_node->as<TableFunctionNode>();
if (!table_node && !table_function_node && !query_node)
if (!table_node && !table_function_node && !query_node && !union_node)
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
"Unexpected table expression. Expected table, table function or query node. Actual {}. In scope {}",
"Unexpected table expression. Expected table, table function, query or union node. Actual {}. In scope {}",
table_expression_node->formatASTForErrorMessage(),
scope.scope_node->formatASTForErrorMessage());
@ -3084,9 +3107,9 @@ void QueryAnalyzer::initializeTableExpressionColumns(QueryTreeNodePtr & table_ex
storage_columns.column_name_to_column_node = std::move(column_name_to_column_node);
}
else if (query_node)
else if (query_node || union_node)
{
auto column_names_and_types = query_node->computeProjectionColumns();
auto column_names_and_types = query_node ? query_node->computeProjectionColumns() : union_node->computeProjectionColumns();
storage_columns.column_name_to_column_node.reserve(column_names_and_types.size());
for (const auto & column_name_and_type : column_names_and_types)
@ -3138,6 +3161,12 @@ void QueryAnalyzer::resolveQueryJoinTreeNode(QueryTreeNodePtr & join_tree_node,
resolveQuery(join_tree_node, subquery_scope);
break;
}
case QueryTreeNodeType::UNION:
{
IdentifierResolveScope subquery_scope(join_tree_node, &scope);
resolveUnion(join_tree_node, subquery_scope);
break;
}
case QueryTreeNodeType::TABLE_FUNCTION:
{
auto & table_function_node = join_tree_node->as<TableFunctionNode &>();
@ -3178,8 +3207,7 @@ void QueryAnalyzer::resolveQueryJoinTreeNode(QueryTreeNodePtr & join_tree_node,
{
if (argument_node->getNodeType() == QueryTreeNodeType::MATCHER)
{
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Matcher as table function argument is not supported {}. In scope {}",
join_tree_node->formatASTForErrorMessage(),
scope.scope_node->formatASTForErrorMessage());
@ -3341,7 +3369,11 @@ void QueryAnalyzer::resolveQueryJoinTreeNode(QueryTreeNodePtr & join_tree_node,
}
}
if (isTableExpression(join_tree_node.get()))
auto join_tree_node_type = join_tree_node->getNodeType();
if (join_tree_node_type == QueryTreeNodeType::QUERY ||
join_tree_node_type == QueryTreeNodeType::UNION ||
join_tree_node_type == QueryTreeNodeType::TABLE ||
join_tree_node_type == QueryTreeNodeType::TABLE_FUNCTION)
initializeTableExpressionColumns(join_tree_node, scope);
add_table_expression_alias_into_scope(join_tree_node);
@ -3419,17 +3451,17 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
*/
scope.use_identifier_lookup_to_result_cache = false;
if (query_node_typed.getFrom())
if (query_node_typed.getJoinTree())
{
TableExpressionsAliasVisitor::Data table_expressions_visitor_data{scope};
TableExpressionsAliasVisitor table_expressions_visitor(table_expressions_visitor_data);
table_expressions_visitor.visit(query_node_typed.getFrom());
table_expressions_visitor.visit(query_node_typed.getJoinTree());
initializeQueryJoinTreeNode(query_node_typed.getFrom(), scope);
initializeQueryJoinTreeNode(query_node_typed.getJoinTree(), scope);
scope.alias_name_to_table_expression_node.clear();
resolveQueryJoinTreeNode(query_node_typed.getFrom(), scope, visitor);
resolveQueryJoinTreeNode(query_node_typed.getJoinTree(), scope, visitor);
}
scope.use_identifier_lookup_to_result_cache = true;
@ -3506,13 +3538,38 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
}
}
void QueryAnalyzer::resolveUnion(const QueryTreeNodePtr & union_node, IdentifierResolveScope & scope)
{
auto & union_node_typed = union_node->as<UnionNode &>();
auto & queries_nodes = union_node_typed.getQueries().getNodes();
for (auto & query_node : queries_nodes)
{
IdentifierResolveScope subquery_scope(query_node, &scope /*parent_scope*/);
auto query_node_type = query_node->getNodeType();
if (query_node_type == QueryTreeNodeType::QUERY)
{
resolveQuery(query_node, subquery_scope);
}
else if (query_node_type == QueryTreeNodeType::UNION)
{
resolveUnion(query_node, subquery_scope);
}
else
{
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
"UNION unsupported node {}. In scope {}",
query_node->formatASTForErrorMessage(),
scope.scope_node->formatASTForErrorMessage());
}
}
}
}
void QueryAnalysisPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
{
if (query_tree_node->getNodeType() != QueryTreeNodeType::QUERY)
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "QueryAnalysis pass requires query node");
QueryAnalyzer analyzer(std::move(context));
analyzer.resolve(query_tree_node);
}

View File

@ -63,10 +63,10 @@ String QueryNode::getName() const
buffer << " SELECT ";
buffer << getProjection().getName();
if (getFrom())
if (getJoinTree())
{
buffer << " FROM ";
buffer << getFrom()->getName();
buffer << getJoinTree()->getName();
}
if (getPrewhere())
@ -107,10 +107,10 @@ void QueryNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, s
buffer << std::string(indent + 2, ' ') << "PROJECTION\n";
getProjection().dumpTreeImpl(buffer, format_state, indent + 4);
if (getFrom())
if (getJoinTree())
{
buffer << '\n' << std::string(indent + 2, ' ') << "JOIN TREE\n";
getFrom()->dumpTreeImpl(buffer, format_state, indent + 4);
getJoinTree()->dumpTreeImpl(buffer, format_state, indent + 4);
}
if (getPrewhere())
@ -129,7 +129,7 @@ void QueryNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, s
bool QueryNode::isEqualImpl(const IQueryTreeNode & rhs) const
{
const auto & rhs_typed = assert_cast<const QueryNode &>(rhs);
return is_subquery == rhs_typed.is_subquery && cte_name == rhs_typed.cte_name;
return is_subquery == rhs_typed.is_subquery && is_cte == rhs_typed.is_cte && cte_name == rhs_typed.cte_name;
}
void QueryNode::updateTreeHashImpl(HashState & state) const
@ -151,7 +151,7 @@ ASTPtr QueryNode::toASTImpl() const
select_query->setExpression(ASTSelectQuery::Expression::SELECT, children[projection_child_index]->toAST());
ASTPtr tables_in_select_query_ast = std::make_shared<ASTTablesInSelectQuery>();
addTableExpressionIntoTablesInSelectQuery(tables_in_select_query_ast, children[from_child_index]);
addTableExpressionIntoTablesInSelectQuery(tables_in_select_query_ast, getJoinTree());
select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables_in_select_query_ast));
if (getPrewhere())

View File

@ -87,14 +87,14 @@ public:
return children[projection_child_index];
}
const QueryTreeNodePtr & getFrom() const
const QueryTreeNodePtr & getJoinTree() const
{
return children[from_child_index];
return children[join_tree_child_index];
}
QueryTreeNodePtr & getFrom()
QueryTreeNodePtr & getJoinTree()
{
return children[from_child_index];
return children[join_tree_child_index];
}
const QueryTreeNodePtr & getPrewhere() const
@ -150,7 +150,7 @@ private:
static constexpr size_t with_child_index = 0;
static constexpr size_t projection_child_index = 1;
static constexpr size_t from_child_index = 2;
static constexpr size_t join_tree_child_index = 2;
static constexpr size_t prewhere_child_index = 3;
static constexpr size_t where_child_index = 4;
static constexpr size_t group_by_child_index = 5;

View File

@ -8,6 +8,7 @@
#include <Parsers/ParserSelectQuery.h>
#include <Parsers/ParserSelectWithUnionQuery.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
#include <Parsers/ASTSelectIntersectExceptQuery.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTTablesInSelectQuery.h>
@ -33,6 +34,7 @@
#include <Analyzer/QueryNode.h>
#include <Analyzer/ArrayJoinNode.h>
#include <Analyzer/JoinNode.h>
#include <Analyzer/UnionNode.h>
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Databases/IDatabase.h>
@ -54,6 +56,9 @@ namespace ErrorCodes
extern const int BAD_ARGUMENTS;
}
namespace
{
class QueryTreeBuilder : public WithContext
{
public:
@ -65,17 +70,21 @@ public:
}
private:
QueryTreeNodePtr getSelectWithUnionExpression(const ASTPtr & select_with_union_query, bool is_subquery, const std::string & cte_name) const;
QueryTreeNodePtr buildSelectOrUnionExpression(const ASTPtr & select_or_union_query, bool is_subquery, const std::string & cte_name) const;
QueryTreeNodePtr getSelectExpression(const ASTPtr & select_query, bool is_subquery, const std::string & cte_name) const;
QueryTreeNodePtr buildSelectWithUnionExpression(const ASTPtr & select_with_union_query, bool is_subquery, const std::string & cte_name) const;
QueryTreeNodePtr getExpressionList(const ASTPtr & expression_list) const;
QueryTreeNodePtr buildSelectIntersectExceptQuery(const ASTPtr & select_intersect_except_query, bool is_subquery, const std::string & cte_name) const;
QueryTreeNodePtr getExpression(const ASTPtr & expression) const;
QueryTreeNodePtr buildSelectExpression(const ASTPtr & select_query, bool is_subquery, const std::string & cte_name) const;
QueryTreeNodePtr getFromNode(const ASTPtr & tables_in_select_query) const;
QueryTreeNodePtr buildExpressionList(const ASTPtr & expression_list) const;
ColumnTransformersNodes getColumnTransformers(const ASTPtr & matcher_expression, size_t start_child_index) const;
QueryTreeNodePtr buildExpression(const ASTPtr & expression) const;
QueryTreeNodePtr buildJoinTree(const ASTPtr & tables_in_select_query) const;
ColumnTransformersNodes buildColumnTransformers(const ASTPtr & matcher_expression, size_t start_child_index) const;
ASTPtr query;
QueryTreeNodePtr query_tree_node;
@ -86,47 +95,94 @@ QueryTreeBuilder::QueryTreeBuilder(ASTPtr query_, ContextPtr context_)
: WithContext(context_)
, query(query_->clone())
{
if (query->as<ASTSelectWithUnionQuery>())
query_tree_node = getSelectWithUnionExpression(query, false /*is_subquery*/, {} /*cte_name*/);
else if (query->as<ASTSelectQuery>())
query_tree_node = getSelectExpression(query, false /*is_subquery*/, {} /*cte_name*/);
if (query->as<ASTSelectWithUnionQuery>() ||
query->as<ASTSelectIntersectExceptQuery>() ||
query->as<ASTSelectQuery>())
query_tree_node = buildSelectOrUnionExpression(query, false /*is_subquery*/, {} /*cte_name*/);
else if (query->as<ASTExpressionList>())
query_tree_node = getExpressionList(query);
query_tree_node = buildExpressionList(query);
else
query_tree_node = getExpression(query);
query_tree_node = buildExpression(query);
}
QueryTreeNodePtr QueryTreeBuilder::getSelectWithUnionExpression(const ASTPtr & select_with_union_query, bool is_subquery, const std::string & cte_name) const
QueryTreeNodePtr QueryTreeBuilder::buildSelectOrUnionExpression(const ASTPtr & select_or_union_query, bool is_subquery, const std::string & cte_name) const
{
QueryTreeNodePtr query_node;
if (select_or_union_query->as<ASTSelectWithUnionQuery>())
query_node = buildSelectWithUnionExpression(select_or_union_query, is_subquery /*is_subquery*/, cte_name /*cte_name*/);
else if (select_or_union_query->as<ASTSelectIntersectExceptQuery>())
query_node = buildSelectIntersectExceptQuery(select_or_union_query, is_subquery /*is_subquery*/, cte_name /*cte_name*/);
else if (select_or_union_query->as<ASTSelectQuery>())
query_node = buildSelectExpression(select_or_union_query, is_subquery /*is_subquery*/, cte_name /*cte_name*/);
else
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "UNION query {} is not supported", select_or_union_query->formatForErrorMessage());
return query_node;
}
QueryTreeNodePtr QueryTreeBuilder::buildSelectWithUnionExpression(const ASTPtr & select_with_union_query, bool is_subquery, const std::string & cte_name) const
{
auto & select_with_union_query_typed = select_with_union_query->as<ASTSelectWithUnionQuery &>();
auto & select_lists = select_with_union_query_typed.list_of_selects->as<ASTExpressionList &>();
if (select_lists.children.size() == 1)
return buildSelectOrUnionExpression(select_lists.children[0], is_subquery, cte_name);
auto union_node = std::make_shared<UnionNode>();
union_node->setIsSubquery(is_subquery);
union_node->setCTEName(cte_name);
union_node->setUnionMode(select_with_union_query_typed.union_mode);
union_node->setUnionModes(select_with_union_query_typed.list_of_modes);
union_node->setOriginalAST(select_with_union_query);
size_t select_lists_children_size = select_lists.children.size();
for (size_t i = 0; i < select_lists_children_size; ++i)
{
return getSelectExpression(select_with_union_query->children[0]->children[0], is_subquery, cte_name);
auto & select_list_node = select_lists.children[i];
QueryTreeNodePtr query_node = buildSelectOrUnionExpression(select_list_node, false /*is_subquery*/, {} /*cte_name*/);
union_node->getQueries().getNodes().push_back(std::move(query_node));
}
else
{
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "UNION is not supported");
// auto union_expression = UnionExpression::create(is_scalar_query);
// union_expression->getModes() = select_with_union_query_typed.list_of_modes;
// union_expression->getModesSet() = select_with_union_query_typed.set_of_modes;
// auto & select_expressions = union_expression->getSelectExpressions();
// select_expressions.reserve(select_lists.children.size());
// for (const auto & select : select_lists.children)
// {
// auto expression = getSelectExpression(select, false);
// select_expressions.emplace_back(std::move(expression));
// }
// return union_expression;
}
return union_node;
}
QueryTreeNodePtr QueryTreeBuilder::getSelectExpression(const ASTPtr & select_query, bool is_subquery, const std::string & cte_name) const
QueryTreeNodePtr QueryTreeBuilder::buildSelectIntersectExceptQuery(const ASTPtr & select_intersect_except_query, bool is_subquery, const std::string & cte_name) const
{
auto & select_intersect_except_query_typed = select_intersect_except_query->as<ASTSelectIntersectExceptQuery &>();
auto select_lists = select_intersect_except_query_typed.getListOfSelects();
if (select_lists.size() == 1)
return buildSelectExpression(select_lists[0], is_subquery, cte_name);
auto union_node = std::make_shared<UnionNode>();
union_node->setIsSubquery(is_subquery);
union_node->setCTEName(cte_name);
if (select_intersect_except_query_typed.final_operator == ASTSelectIntersectExceptQuery::Operator::INTERSECT)
union_node->setUnionMode(SelectUnionMode::INTERSECT);
else if (select_intersect_except_query_typed.final_operator == ASTSelectIntersectExceptQuery::Operator::EXCEPT)
union_node->setUnionMode(SelectUnionMode::EXCEPT);
else
throw Exception(ErrorCodes::LOGICAL_ERROR, "UNION type is not initialized");
union_node->setUnionModes(SelectUnionModes(select_lists.size() - 1, union_node->getUnionMode()));
union_node->setOriginalAST(select_intersect_except_query);
size_t select_lists_size = select_lists.size();
for (size_t i = 0; i < select_lists_size; ++i)
{
auto & select_list_node = select_lists[i];
QueryTreeNodePtr query_node = buildSelectOrUnionExpression(select_list_node, false /*is_subquery*/, {} /*cte_name*/);
union_node->getQueries().getNodes().push_back(std::move(query_node));
}
return union_node;
}
QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_query, bool is_subquery, const std::string & cte_name) const
{
const auto & select_query_typed = select_query->as<ASTSelectQuery &>();
auto current_query_tree = std::make_shared<QueryNode>();
@ -135,7 +191,7 @@ QueryTreeNodePtr QueryTreeBuilder::getSelectExpression(const ASTPtr & select_que
current_query_tree->setIsCTE(!cte_name.empty());
current_query_tree->setCTEName(cte_name);
current_query_tree->getFrom() = getFromNode(select_query_typed.tables());
current_query_tree->getJoinTree() = buildJoinTree(select_query_typed.tables());
current_query_tree->setOriginalAST(select_query);
auto select_with_list = select_query_typed.with();
@ -144,7 +200,7 @@ QueryTreeNodePtr QueryTreeBuilder::getSelectExpression(const ASTPtr & select_que
auto & select_with_list_typed = select_with_list->as<ASTExpressionList &>();
for (auto & expression_part : select_with_list_typed.children)
{
auto expression_node = getExpression(expression_part);
auto expression_node = buildExpression(expression_part);
current_query_tree->getWith().getNodes().push_back(expression_node);
}
}
@ -156,23 +212,23 @@ QueryTreeNodePtr QueryTreeBuilder::getSelectExpression(const ASTPtr & select_que
for (auto & expression_part : select_expression_list_typed.children)
{
auto expression_node = getExpression(expression_part);
auto expression_node = buildExpression(expression_part);
current_query_tree->getProjection().getNodes().push_back(expression_node);
}
}
auto prewhere_expression = select_query_typed.prewhere();
if (prewhere_expression)
current_query_tree->getPrewhere() = getExpression(prewhere_expression);
current_query_tree->getPrewhere() = buildExpression(prewhere_expression);
auto where_expression = select_query_typed.where();
if (where_expression)
current_query_tree->getWhere() = getExpression(where_expression);
current_query_tree->getWhere() = buildExpression(where_expression);
return current_query_tree;
}
QueryTreeNodePtr QueryTreeBuilder::getExpressionList(const ASTPtr & expression_list) const
QueryTreeNodePtr QueryTreeBuilder::buildExpressionList(const ASTPtr & expression_list) const
{
auto list_node = std::make_shared<ListNode>();
@ -181,14 +237,14 @@ QueryTreeNodePtr QueryTreeBuilder::getExpressionList(const ASTPtr & expression_l
for (auto & expression : expression_list_typed.children)
{
auto expression_node = getExpression(expression);
auto expression_node = buildExpression(expression);
list_node->getNodes().push_back(std::move(expression_node));
}
return list_node;
}
QueryTreeNodePtr QueryTreeBuilder::getExpression(const ASTPtr & expression) const
QueryTreeNodePtr QueryTreeBuilder::buildExpression(const ASTPtr & expression) const
{
QueryTreeNodePtr result;
@ -200,7 +256,7 @@ QueryTreeNodePtr QueryTreeBuilder::getExpression(const ASTPtr & expression) cons
}
else if (const auto * asterisk = expression->as<ASTAsterisk>())
{
auto column_transformers = getColumnTransformers(expression, 0 /*start_child_index*/);
auto column_transformers = buildColumnTransformers(expression, 0 /*start_child_index*/);
result = std::make_shared<MatcherNode>(column_transformers);
}
else if (const auto * qualified_asterisk = expression->as<ASTQualifiedAsterisk>())
@ -210,7 +266,7 @@ QueryTreeNodePtr QueryTreeBuilder::getExpression(const ASTPtr & expression) cons
/// but ASTTableIdentifier can contain only 2 parts.
auto & qualified_identifier = qualified_asterisk->children.at(0)->as<ASTTableIdentifier &>();
auto column_transformers = getColumnTransformers(expression, 1 /*start_child_index*/);
auto column_transformers = buildColumnTransformers(expression, 1 /*start_child_index*/);
result = std::make_shared<MatcherNode>(Identifier(qualified_identifier.name_parts), column_transformers);
}
else if (const auto * ast_literal = expression->as<ASTLiteral>())
@ -259,7 +315,7 @@ QueryTreeNodePtr QueryTreeBuilder::getExpression(const ASTPtr & expression) cons
}
const auto & lambda_expression = lambda_arguments_and_expression.at(1);
auto lambda_expression_node = getExpression(lambda_expression);
auto lambda_expression_node = buildExpression(lambda_expression);
result = std::make_shared<LambdaNode>(std::move(lambda_arguments), std::move(lambda_expression_node));
}
@ -271,14 +327,14 @@ QueryTreeNodePtr QueryTreeBuilder::getExpression(const ASTPtr & expression) cons
{
const auto & function_parameters_list = function->parameters->as<ASTExpressionList>()->children;
for (const auto & argument : function_parameters_list)
function_node->getParameters().getNodes().push_back(getExpression(argument));
function_node->getParameters().getNodes().push_back(buildExpression(argument));
}
if (function->arguments)
{
const auto & function_arguments_list = function->arguments->as<ASTExpressionList>()->children;
for (const auto & argument : function_arguments_list)
function_node->getArguments().getNodes().push_back(getExpression(argument));
function_node->getArguments().getNodes().push_back(buildExpression(argument));
}
result = function_node;
@ -287,20 +343,20 @@ QueryTreeNodePtr QueryTreeBuilder::getExpression(const ASTPtr & expression) cons
else if (const auto * subquery = expression->as<ASTSubquery>())
{
auto subquery_query = subquery->children[0];
auto query_node = getSelectWithUnionExpression(subquery_query, true /*is_subquery*/, {} /*cte_name*/);
auto query_node = buildSelectWithUnionExpression(subquery_query, true /*is_subquery*/, {} /*cte_name*/);
result = query_node;
}
else if (const auto * with_element = expression->as<ASTWithElement>())
{
auto with_element_subquery = with_element->subquery->as<ASTSubquery &>().children.at(0);
auto query_node = getSelectWithUnionExpression(with_element_subquery, true /*is_subquery*/, with_element->name /*cte_name*/);
auto query_node = buildSelectWithUnionExpression(with_element_subquery, true /*is_subquery*/, with_element->name /*cte_name*/);
result = query_node;
}
else if (const auto * columns_regexp_matcher = expression->as<ASTColumnsRegexpMatcher>())
{
auto column_transformers = getColumnTransformers(expression, 0 /*start_child_index*/);
auto column_transformers = buildColumnTransformers(expression, 0 /*start_child_index*/);
result = std::make_shared<MatcherNode>(columns_regexp_matcher->getMatcher(), std::move(column_transformers));
}
else if (const auto * columns_list_matcher = expression->as<ASTColumnsListMatcher>())
@ -314,13 +370,13 @@ QueryTreeNodePtr QueryTreeBuilder::getExpression(const ASTPtr & expression) cons
column_list_identifiers.emplace_back(Identifier{column_list_identifier.name_parts});
}
auto column_transformers = getColumnTransformers(expression, 0 /*start_child_index*/);
auto column_transformers = buildColumnTransformers(expression, 0 /*start_child_index*/);
result = std::make_shared<MatcherNode>(std::move(column_list_identifiers), std::move(column_transformers));
}
else if (const auto * qualified_columns_regexp_matcher = expression->as<ASTQualifiedColumnsRegexpMatcher>())
{
auto & qualified_identifier = qualified_columns_regexp_matcher->children.at(0)->as<ASTTableIdentifier &>();
auto column_transformers = getColumnTransformers(expression, 1 /*start_child_index*/);
auto column_transformers = buildColumnTransformers(expression, 1 /*start_child_index*/);
result = std::make_shared<MatcherNode>(Identifier(qualified_identifier.name_parts), qualified_columns_regexp_matcher->getMatcher(), std::move(column_transformers));
}
else if (const auto * qualified_columns_list_matcher = expression->as<ASTQualifiedColumnsListMatcher>())
@ -336,7 +392,7 @@ QueryTreeNodePtr QueryTreeBuilder::getExpression(const ASTPtr & expression) cons
column_list_identifiers.emplace_back(Identifier{column_list_identifier.name_parts});
}
auto column_transformers = getColumnTransformers(expression, 1 /*start_child_index*/);
auto column_transformers = buildColumnTransformers(expression, 1 /*start_child_index*/);
result = std::make_shared<MatcherNode>(Identifier(qualified_identifier.name_parts), column_list_identifiers, std::move(column_transformers));
}
else
@ -350,7 +406,7 @@ QueryTreeNodePtr QueryTreeBuilder::getExpression(const ASTPtr & expression) cons
return result;
}
QueryTreeNodePtr QueryTreeBuilder::getFromNode(const ASTPtr & tables_in_select_query) const
QueryTreeNodePtr QueryTreeBuilder::buildJoinTree(const ASTPtr & tables_in_select_query) const
{
if (!tables_in_select_query)
{
@ -389,7 +445,7 @@ QueryTreeNodePtr QueryTreeBuilder::getFromNode(const ASTPtr & tables_in_select_q
auto & subquery_expression = table_expression.subquery->as<ASTSubquery &>();
const auto & select_with_union_query = subquery_expression.children[0];
auto node = getSelectWithUnionExpression(select_with_union_query, true /*is_subquery*/, {} /*cte_name*/);
auto node = buildSelectWithUnionExpression(select_with_union_query, true /*is_subquery*/, {} /*cte_name*/);
node->setAlias(subquery_expression.tryGetAlias());
node->setOriginalAST(select_with_union_query);
@ -406,7 +462,7 @@ QueryTreeNodePtr QueryTreeBuilder::getFromNode(const ASTPtr & tables_in_select_q
{
const auto & function_arguments_list = table_function_expression.arguments->as<ASTExpressionList>()->children;
for (const auto & argument : function_arguments_list)
node->getArguments().getNodes().push_back(getExpression(argument));
node->getArguments().getNodes().push_back(buildExpression(argument));
}
node->setAlias(table_function_expression.tryGetAlias());
@ -433,9 +489,9 @@ QueryTreeNodePtr QueryTreeBuilder::getFromNode(const ASTPtr & tables_in_select_q
QueryTreeNodePtr join_expression;
if (table_join.using_expression_list)
join_expression = getExpressionList(table_join.using_expression_list);
join_expression = buildExpressionList(table_join.using_expression_list);
else if (table_join.on_expression)
join_expression = getExpression(table_join.on_expression);
join_expression = buildExpression(table_join.on_expression);
auto join_node = std::make_shared<JoinNode>(std::move(left_table_expression),
std::move(right_table_expression),
@ -458,7 +514,7 @@ QueryTreeNodePtr QueryTreeBuilder::getFromNode(const ASTPtr & tables_in_select_q
auto last_table_expression = std::move(table_expressions.back());
table_expressions.pop_back();
auto array_join_expressions_list = getExpressionList(array_join_expression.expression_list);
auto array_join_expressions_list = buildExpressionList(array_join_expression.expression_list);
auto array_join_node = std::make_shared<ArrayJoinNode>(std::move(last_table_expression), std::move(array_join_expressions_list), is_left_array_join);
/** Original AST is not set because it will contain only array join part and does
@ -478,7 +534,7 @@ QueryTreeNodePtr QueryTreeBuilder::getFromNode(const ASTPtr & tables_in_select_q
}
ColumnTransformersNodes QueryTreeBuilder::getColumnTransformers(const ASTPtr & matcher_expression, size_t start_child_index) const
ColumnTransformersNodes QueryTreeBuilder::buildColumnTransformers(const ASTPtr & matcher_expression, size_t start_child_index) const
{
ColumnTransformersNodes column_transformers;
size_t children_size = matcher_expression->children.size();
@ -491,14 +547,14 @@ ColumnTransformersNodes QueryTreeBuilder::getColumnTransformers(const ASTPtr & m
{
if (apply_transformer->lambda)
{
auto lambda_query_tree_node = getExpression(apply_transformer->lambda);
auto lambda_query_tree_node = buildExpression(apply_transformer->lambda);
column_transformers.emplace_back(std::make_shared<ApplyColumnTransformerNode>(std::move(lambda_query_tree_node)));
}
else
{
auto function_node = std::make_shared<FunctionNode>(apply_transformer->func_name);
if (apply_transformer->parameters)
function_node->getParametersNode() = getExpressionList(apply_transformer->parameters);
function_node->getParametersNode() = buildExpressionList(apply_transformer->parameters);
column_transformers.emplace_back(std::make_shared<ApplyColumnTransformerNode>(std::move(function_node)));
}
@ -529,7 +585,7 @@ ColumnTransformersNodes QueryTreeBuilder::getColumnTransformers(const ASTPtr & m
for (const auto & replace_transformer_child : replace_transformer->children)
{
auto & replacement = replace_transformer_child->as<ASTColumnsReplaceTransformer::Replacement &>();
replacements.emplace_back(ReplaceColumnTransformerNode::Replacement{replacement.name, getExpression(replacement.expr)});
replacements.emplace_back(ReplaceColumnTransformerNode::Replacement{replacement.name, buildExpression(replacement.expr)});
}
column_transformers.emplace_back(std::make_shared<ReplaceColumnTransformerNode>(replacements, replace_transformer->is_strict));
@ -543,6 +599,8 @@ ColumnTransformersNodes QueryTreeBuilder::getColumnTransformers(const ASTPtr & m
return column_transformers;
}
}
QueryTreeNodePtr buildQueryTree(ASTPtr query, ContextPtr context)
{
QueryTreeBuilder builder(query, context);

193
src/Analyzer/UnionNode.cpp Normal file
View File

@ -0,0 +1,193 @@
#include <Analyzer/UnionNode.h>
#include <Common/SipHash.h>
#include <Core/NamesAndTypes.h>
#include <IO/WriteBuffer.h>
#include <IO/WriteHelpers.h>
#include <IO/Operators.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/ASTSubquery.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTFunction.h>
#include <Core/ColumnWithTypeAndName.h>
#include <DataTypes/getLeastSupertype.h>
#include <Analyzer/QueryNode.h>
#include <Analyzer/Utils.h>
namespace DB
{
namespace ErrorCodes
{
extern const int TYPE_MISMATCH;
}
UnionNode::UnionNode()
{
children.resize(children_size);
children[queries_child_index] = std::make_shared<ListNode>();
}
NamesAndTypesList UnionNode::computeProjectionColumns() const
{
std::vector<NamesAndTypes> projections;
const auto & query_nodes = getQueries().getNodes();
NamesAndTypes query_node_projection;
for (const auto & query_node : query_nodes)
{
if (auto * query_node_typed = query_node->as<QueryNode>())
{
auto projection_columns = query_node_typed->computeProjectionColumns();
query_node_projection = NamesAndTypes(projection_columns.begin(), projection_columns.end());
}
else if (auto * union_node_typed = query_node->as<UnionNode>())
{
auto projection_columns = union_node_typed->computeProjectionColumns();
query_node_projection = NamesAndTypes(projection_columns.begin(), projection_columns.end());
}
projections.push_back(query_node_projection);
if (query_node_projection.size() != projections.front().size())
throw Exception(ErrorCodes::TYPE_MISMATCH, "UNION different number of columns in queries");
}
NamesAndTypesList result_columns;
size_t projections_size = projections.size();
DataTypes projection_column_types;
projection_column_types.resize(projections_size);
size_t columns_size = query_node_projection.size();
for (size_t column_index = 0; column_index < columns_size; ++column_index)
{
for (size_t projection_index = 0; projection_index < projections_size; ++projection_index)
projection_column_types[projection_index] = projections[projection_index][column_index].type;
auto result_type = getLeastSupertype(projection_column_types);
result_columns.emplace_back(projections.front()[column_index].name, std::move(result_type));
}
return result_columns;
}
String UnionNode::getName() const
{
WriteBufferFromOwnString buffer;
auto query_nodes = getQueries().getNodes();
size_t query_nodes_size = query_nodes.size();
for (size_t i = 0; i < query_nodes_size; ++i)
{
const auto & query_node = query_nodes[i];
buffer << query_node->getName();
if (i == 0)
continue;
auto query_union_mode = union_modes.at(i - 1);
if (query_union_mode == SelectUnionMode::ALL || query_union_mode == SelectUnionMode::DISTINCT)
buffer << " UNION " << toString(query_union_mode);
else
buffer << toString(query_union_mode);
}
return buffer.str();
}
void UnionNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const
{
buffer << std::string(indent, ' ') << "UNION id: " << format_state.getNodeId(this);
if (hasAlias())
buffer << ", alias: " << getAlias();
buffer << ", is_subquery: " << is_subquery;
buffer << ", is_cte: " << is_cte;
if (!cte_name.empty())
buffer << ", cte_name: " << cte_name;
buffer << ", union_mode: ";
if (union_mode == SelectUnionMode::ALL || union_mode == SelectUnionMode::DISTINCT)
buffer << " UNION " << toString(union_mode);
else
buffer << toString(union_mode);
size_t union_modes_size = union_modes.size();
buffer << '\n' << std::string(indent + 2, ' ') << "UNION MODES " << union_modes_size << '\n';
for (size_t i = 0; i < union_modes_size; ++i)
{
buffer << std::string(indent + 4, ' ');
auto query_union_mode = union_modes[i];
if (query_union_mode == SelectUnionMode::ALL || query_union_mode == SelectUnionMode::DISTINCT)
buffer << " UNION " << toString(query_union_mode);
else
buffer << toString(query_union_mode);
if (i + 1 != union_modes_size)
buffer << '\n';
}
buffer << '\n' << std::string(indent + 2, ' ') << "QUERIES\n";
getQueriesNode()->dumpTreeImpl(buffer, format_state, indent + 4);
}
bool UnionNode::isEqualImpl(const IQueryTreeNode & rhs) const
{
const auto & rhs_typed = assert_cast<const UnionNode &>(rhs);
return is_subquery == rhs_typed.is_subquery && is_cte == rhs_typed.is_cte && cte_name == rhs_typed.cte_name;
}
void UnionNode::updateTreeHashImpl(HashState & state) const
{
state.update(is_subquery);
state.update(is_cte);
state.update(cte_name.size());
state.update(cte_name);
}
ASTPtr UnionNode::toASTImpl() const
{
auto select_with_union_query = std::make_shared<ASTSelectWithUnionQuery>();
select_with_union_query->union_mode = union_mode;
select_with_union_query->list_of_modes = union_modes;
select_with_union_query->set_of_modes = union_modes_set;
select_with_union_query->children.push_back(getQueriesNode()->toAST());
select_with_union_query->list_of_selects = select_with_union_query->children.back();
return select_with_union_query;
}
QueryTreeNodePtr UnionNode::cloneImpl() const
{
auto result_query_node = std::make_shared<UnionNode>();
result_query_node->is_subquery = is_subquery;
result_query_node->is_cte = is_cte;
result_query_node->cte_name = cte_name;
result_query_node->union_mode = union_mode;
result_query_node->union_modes = union_modes;
result_query_node->union_modes_set = union_modes_set;
return result_query_node;
}
}

126
src/Analyzer/UnionNode.h Normal file
View File

@ -0,0 +1,126 @@
#pragma once
#include <Analyzer/Identifier.h>
#include <Analyzer/IQueryTreeNode.h>
#include <Analyzer/ListNode.h>
#include <Parsers/SelectUnionMode.h>
namespace DB
{
/** Union node represents union of queries in query tree.
*/
class UnionNode;
using UnionNodePtr = std::shared_ptr<UnionNode>;
class UnionNode final : public IQueryTreeNode
{
public:
explicit UnionNode();
bool isSubquery() const
{
return is_subquery;
}
void setIsSubquery(bool is_subquery_value)
{
is_subquery = is_subquery_value;
}
bool isCTE() const
{
return is_cte;
}
void setIsCTE(bool is_cte_value)
{
is_cte = is_cte_value;
}
const std::string & getCTEName() const
{
return cte_name;
}
void setCTEName(std::string cte_name_value)
{
cte_name = std::move(cte_name_value);
}
SelectUnionMode getUnionMode() const
{
return union_mode;
}
void setUnionMode(SelectUnionMode union_mode_value)
{
union_mode = union_mode_value;
}
const SelectUnionModes & getUnionModes() const
{
return union_modes;
}
void setUnionModes(const SelectUnionModes & union_modes_value)
{
union_modes = union_modes_value;
union_modes_set = SelectUnionModesSet(union_modes.begin(), union_modes.end());
}
const QueryTreeNodePtr & getQueriesNode() const
{
return children[queries_child_index];
}
QueryTreeNodePtr & getQueriesNode()
{
return children[queries_child_index];
}
const ListNode & getQueries() const
{
return children[queries_child_index]->as<const ListNode &>();
}
ListNode & getQueries()
{
return children[queries_child_index]->as<ListNode &>();
}
/// Compute union projection
NamesAndTypesList computeProjectionColumns() const;
QueryTreeNodeType getNodeType() const override
{
return QueryTreeNodeType::UNION;
}
String getName() const override;
void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override;
bool isEqualImpl(const IQueryTreeNode & rhs) const override;
void updateTreeHashImpl(HashState &) const override;
protected:
ASTPtr toASTImpl() const override;
QueryTreeNodePtr cloneImpl() const override;
private:
bool is_subquery = false;
bool is_cte = false;
std::string cte_name;
SelectUnionMode union_mode;
SelectUnionModes union_modes;
SelectUnionModesSet union_modes_set;
static constexpr size_t queries_child_index = 0;
static constexpr size_t children_size = queries_child_index + 1;
};
}

View File

@ -43,12 +43,6 @@ bool isNameOfInFunction(const std::string & function_name)
return is_special_function_in;
}
bool isTableExpression(const IQueryTreeNode * node)
{
auto node_type = node->getNodeType();
return node_type == QueryTreeNodeType::TABLE || node_type == QueryTreeNodeType::TABLE_FUNCTION || node_type == QueryTreeNodeType::QUERY;
}
static ASTPtr convertIntoTableExpressionAST(const QueryTreeNodePtr & table_expression_node)
{
ASTPtr table_expression_node_ast;

View File

@ -11,9 +11,6 @@ bool isNodePartOfTree(const IQueryTreeNode * node, const IQueryTreeNode * root);
/// Returns true if function name is name of IN function or its variations, false otherwise
bool isNameOfInFunction(const std::string & function_name);
/// Returns true if node hase type table, table function, or query, false otherwise
bool isTableExpression(const IQueryTreeNode * node);
/** Add table expression in tables in select query children.
* If table expression node is not of identifier node, table node, query node, table function node, join node or array join node type throws logical error exception.
*/

View File

@ -11,6 +11,8 @@
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/FieldToDataType.h>
#include <Columns/getLeastSuperColumn.h>
#include <Storages/SelectQueryInfo.h>
#include <Storages/IStorage.h>
@ -24,6 +26,7 @@
#include <Analyzer/QueryNode.h>
#include <Analyzer/JoinNode.h>
#include <Analyzer/ArrayJoinNode.h>
#include <Analyzer/UnionNode.h>
#include <Analyzer/QueryTreeBuilder.h>
#include <Analyzer/QueryTreePassManager.h>
#include <Analyzer/InDepthQueryTreeVisitor.h>
@ -43,6 +46,9 @@
#include <Processors/QueryPlan/JoinStep.h>
#include <Processors/QueryPlan/FilterStep.h>
#include <Processors/QueryPlan/ArrayJoinStep.h>
#include <Processors/QueryPlan/UnionStep.h>
#include <Processors/QueryPlan/DistinctStep.h>
#include <Processors/QueryPlan/IntersectOrExceptStep.h>
#include <QueryPipeline/QueryPipelineBuilder.h>
#include <Interpreters/Context.h>
@ -81,6 +87,8 @@ namespace ErrorCodes
* TODO: Support projections
* TODO: Support read in order optimization
* TODO: Simplify actions chain
* TODO: UNION storage limits
* TODO: Interpreter resources
*/
namespace
@ -101,6 +109,35 @@ namespace
return query_pipeline_buffer.str();
}
Block getCommonHeaderForUnion(const Blocks & headers)
{
size_t num_selects = headers.size();
Block common_header = headers.front();
size_t num_columns = common_header.columns();
for (size_t query_num = 1; query_num < num_selects; ++query_num)
{
if (headers[query_num].columns() != num_columns)
throw Exception(ErrorCodes::TYPE_MISMATCH,
"Different number of columns in UNION elements: {} and {}",
common_header.dumpNames(),
headers[query_num].dumpNames());
}
std::vector<const ColumnWithTypeAndName *> columns(num_selects);
for (size_t column_num = 0; column_num < num_columns; ++column_num)
{
for (size_t i = 0; i < num_selects; ++i)
columns[i] = &headers[i].getByPosition(column_num);
ColumnWithTypeAndName & result_elem = common_header.getByPosition(column_num);
result_elem = getLeastSuperColumn(columns);
}
return common_header;
}
class ActionsChainNode;
using ActionsChainNodePtr = std::unique_ptr<ActionsChainNode>;
using ActionsChainNodes = std::vector<ActionsChainNodePtr>;
@ -894,7 +931,7 @@ private:
const auto & function_node = node->as<FunctionNode &>();
WriteBufferFromOwnString buffer;
buffer << function_node.getFunctionName();
buffer << "__function_" + function_node.getFunctionName();
const auto & function_parameters_nodes = function_node.getParameters().getNodes();
@ -973,6 +1010,8 @@ public:
{
case QueryTreeNodeType::QUERY:
[[fallthrough]];
case QueryTreeNodeType::UNION:
[[fallthrough]];
case QueryTreeNodeType::TABLE:
[[fallthrough]];
case QueryTreeNodeType::TABLE_FUNCTION:
@ -1055,9 +1094,10 @@ public:
if (column_source_node_type != QueryTreeNodeType::TABLE &&
column_source_node_type != QueryTreeNodeType::TABLE_FUNCTION &&
column_source_node_type != QueryTreeNodeType::QUERY)
column_source_node_type != QueryTreeNodeType::QUERY &&
column_source_node_type != QueryTreeNodeType::UNION)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Expected table, table function or query column source. Actual {}",
"Expected table, table function, query or union column source. Actual {}",
column_source_node->formatASTForErrorMessage());
auto [source_columns_set_it, inserted] = it->second.source_columns_names.insert(column_node->getColumnName());
@ -1115,6 +1155,7 @@ QueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expression,
auto * table_node = table_expression->as<TableNode>();
auto * table_function_node = table_expression->as<TableFunctionNode>();
auto * query_node = table_expression->as<QueryNode>();
auto * union_node = table_expression->as<UnionNode>();
QueryPlan query_plan;
@ -1168,7 +1209,7 @@ QueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expression,
query_plan.addStep(std::move(read_from_pipe));
}
}
else if (query_node)
else if (query_node || union_node)
{
InterpreterSelectQueryAnalyzer interpeter(table_expression, select_query_options, planner_context.query_context);
interpeter.initializeQueryPlanIfNeeded();
@ -1176,7 +1217,7 @@ QueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expression,
}
else
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected table, table function or query. Actual {}", table_expression->formatASTForErrorMessage());
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected table, table function, query or union. Actual {}", table_expression->formatASTForErrorMessage());
}
auto rename_actions_dag = std::make_shared<ActionsDAG>(query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName());
@ -1975,6 +2016,8 @@ QueryPlan buildQueryPlanForJoinTreeNode(QueryTreeNodePtr join_tree_node,
{
case QueryTreeNodeType::QUERY:
[[fallthrough]];
case QueryTreeNodeType::UNION:
[[fallthrough]];
case QueryTreeNodeType::TABLE:
[[fallthrough]];
case QueryTreeNodeType::TABLE_FUNCTION:
@ -2009,26 +2052,13 @@ InterpreterSelectQueryAnalyzer::InterpreterSelectQueryAnalyzer(
, query(query_)
, select_query_options(select_query_options_)
{
if (auto * select_with_union_query_typed = query->as<ASTSelectWithUnionQuery>())
if (query->as<ASTSelectWithUnionQuery>() || query->as<ASTSelectQuery>())
{
auto & select_lists = select_with_union_query_typed->list_of_selects->as<ASTExpressionList &>();
if (select_lists.children.size() == 1)
{
query = select_lists.children[0];
}
else
{
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "UNION is not supported");
}
}
else if (auto * subquery = query->as<ASTSubquery>())
{
query = subquery->children[0];
}
else if (auto * select_query_typed = query_->as<ASTSelectQuery>())
{
}
else
{
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
@ -2052,10 +2082,11 @@ InterpreterSelectQueryAnalyzer::InterpreterSelectQueryAnalyzer(
, query_tree(query_tree_)
, select_query_options(select_query_options_)
{
if (query_tree_->getNodeType() != QueryTreeNodeType::QUERY)
if (query_tree->getNodeType() != QueryTreeNodeType::QUERY &&
query_tree->getNodeType() != QueryTreeNodeType::UNION)
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
"Expected query node. Actual {}",
query_tree_->formatASTForErrorMessage());
"Expected QUERY or UNION node. Actual {}",
query_tree->formatASTForErrorMessage());
}
@ -2084,10 +2115,85 @@ void InterpreterSelectQueryAnalyzer::initializeQueryPlanIfNeeded()
if (query_plan.isInitialized())
return;
auto & query_node = query_tree->as<QueryNode &>();
auto current_context = getContext();
if (auto * union_query_tree = query_tree->as<UnionNode>())
{
auto union_mode = union_query_tree->getUnionMode();
if (union_mode == SelectUnionMode::Unspecified)
throw Exception(ErrorCodes::LOGICAL_ERROR, "UNION mode must be initialized");
std::vector<std::unique_ptr<QueryPlan>> query_plans;
Blocks query_plans_headers;
for (auto & query_node : union_query_tree->getQueries().getNodes())
{
InterpreterSelectQueryAnalyzer interpeter(query_node, select_query_options, current_context);
interpeter.initializeQueryPlanIfNeeded();
auto query_node_plan = std::make_unique<QueryPlan>(std::move(interpeter).extractQueryPlan());
query_plans_headers.push_back(query_node_plan->getCurrentDataStream().header);
query_plans.push_back(std::move(query_node_plan));
}
Block union_common_header = getCommonHeaderForUnion(query_plans_headers);
DataStreams query_plans_streams;
query_plans_streams.reserve(query_plans.size());
for (auto & query_node_plan : query_plans)
{
if (blocksHaveEqualStructure(query_node_plan->getCurrentDataStream().header, union_common_header))
continue;
auto actions_dag = ActionsDAG::makeConvertingActions(
query_node_plan->getCurrentDataStream().header.getColumnsWithTypeAndName(),
union_common_header.getColumnsWithTypeAndName(),
ActionsDAG::MatchColumnsMode::Position);
auto converting_step = std::make_unique<ExpressionStep>(query_node_plan->getCurrentDataStream(), std::move(actions_dag));
converting_step->setStepDescription("Conversion before UNION");
query_node_plan->addStep(std::move(converting_step));
query_plans_streams.push_back(query_node_plan->getCurrentDataStream());
}
const auto & settings = current_context->getSettingsRef();
auto max_threads = settings.max_threads;
if (union_mode == SelectUnionMode::ALL || union_mode == SelectUnionMode::DISTINCT)
{
auto union_step = std::make_unique<UnionStep>(std::move(query_plans_streams), max_threads);
query_plan.unitePlans(std::move(union_step), std::move(query_plans));
if (union_query_tree->getUnionMode() == SelectUnionMode::DISTINCT)
{
/// Add distinct transform
SizeLimits limits(settings.max_rows_in_distinct, settings.max_bytes_in_distinct, settings.distinct_overflow_mode);
auto distinct_step = std::make_unique<DistinctStep>(
query_plan.getCurrentDataStream(),
limits,
0 /*limit hint*/,
query_plan.getCurrentDataStream().header.getNames(),
false /*pre distinct*/,
settings.optimize_distinct_in_order);
query_plan.addStep(std::move(distinct_step));
}
}
else if (union_mode == SelectUnionMode::INTERSECT || union_mode == SelectUnionMode::EXCEPT)
{
IntersectOrExceptStep::Operator intersect_or_except_operator = IntersectOrExceptStep::Operator::INTERSECT;
if (union_mode == SelectUnionMode::EXCEPT)
intersect_or_except_operator = IntersectOrExceptStep::Operator::EXCEPT;
auto union_step = std::make_unique<IntersectOrExceptStep>(std::move(query_plans_streams), intersect_or_except_operator, max_threads);
query_plan.unitePlans(std::move(union_step), std::move(query_plans));
}
return;
}
auto & query_node = query_tree->as<QueryNode &>();
SelectQueryInfo select_query_info;
select_query_info.original_query = query;
select_query_info.query = query;
@ -2096,13 +2202,13 @@ void InterpreterSelectQueryAnalyzer::initializeQueryPlanIfNeeded()
planner_context.query_context = getContext();
CollectTableExpressionIdentifiersVisitor collect_table_expression_identifiers_visitor;
collect_table_expression_identifiers_visitor.visit(query_node.getFrom(), planner_context);
collect_table_expression_identifiers_visitor.visit(query_node.getJoinTree(), planner_context);
CollectSourceColumnsVisitor::Data data {planner_context};
CollectSourceColumnsVisitor collect_source_columns_visitor(data);
collect_source_columns_visitor.visit(query_tree);
query_plan = buildQueryPlanForJoinTreeNode(query_node.getFrom(), select_query_info, select_query_options, planner_context);
query_plan = buildQueryPlanForJoinTreeNode(query_node.getJoinTree(), select_query_info, select_query_options, planner_context);
std::optional<std::vector<size_t>> action_chain_node_parent_indices;
if (query_node.hasWhere())
@ -2167,6 +2273,14 @@ void InterpreterSelectQueryAnalyzer::initializeQueryPlanIfNeeded()
planner_context.projection_actions->project(projection_names);
// std::cout << "Chain dump before finalize" << std::endl;
// std::cout << planner_context.actions_chain.dump() << std::endl;
planner_context.actions_chain.finalize();
// std::cout << "Chain dump after finalize" << std::endl;
// std::cout << planner_context.actions_chain.dump() << std::endl;
if (query_node.hasWhere())
{
auto & where_actions_chain_node = planner_context.actions_chain.at(planner_context.where_actions_chain_node_index);
@ -2185,14 +2299,6 @@ void InterpreterSelectQueryAnalyzer::initializeQueryPlanIfNeeded()
auto projection_step = std::make_unique<ExpressionStep>(query_plan.getCurrentDataStream(), planner_context.projection_actions);
projection_step->setStepDescription("Projection");
query_plan.addStep(std::move(projection_step));
// std::cout << "Chain dump before finalize" << std::endl;
// std::cout << planner_context.actions_chain.dump() << std::endl;
planner_context.actions_chain.finalize();
// std::cout << "Chain dump after finalize" << std::endl;
// std::cout << planner_context.actions_chain.dump() << std::endl;
}
}

View File

@ -87,7 +87,7 @@ bool ParserExplainQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
explain_query->setTableFunction(table_function);
explain_query->setTableOverride(table_override);
}
if (kind == ASTExplainQuery::ExplainKind::QueryTree)
else if (kind == ASTExplainQuery::ExplainKind::QueryTree)
{
if (select_p.parse(pos, query, expected))
explain_query->setExplainedQuery(std::move(query));

View File

@ -8,9 +8,9 @@ namespace DB
class IntersectOrExceptStep : public IQueryPlanStep
{
using Operator = ASTSelectIntersectExceptQuery::Operator;
public:
using Operator = ASTSelectIntersectExceptQuery::Operator;
/// max_threads is used to limit the number of threads for result pipeline.
IntersectOrExceptStep(DataStreams input_streams_, Operator operator_, size_t max_threads_ = 0);

View File

@ -0,0 +1,62 @@
-- { echoOn }
SELECT 'Union constants';
Union constants
SELECT 1 UNION ALL SELECT 1;
1
1
SELECT '--';
--
SELECT 1 UNION DISTINCT SELECT 1 UNION ALL SELECT 1;
1
1
SELECT '--';
--
SELECT 1 INTERSECT SELECT 1;
1
SELECT '--';
--
SELECT 1 EXCEPT SELECT 1;
SELECT '--';
--
SELECT id FROM (SELECT 1 AS id UNION ALL SELECT 1);
1
1
SELECT 'Union non constants';
Union non constants
SELECT value FROM (SELECT 1 as value UNION ALL SELECT 1 UNION ALL SELECT 1);
1
1
1
SELECT '--';
--
SELECT id FROM test_table UNION ALL SELECT id FROM test_table;
0
0
SELECT '--';
--
SELECT id FROM test_table UNION DISTINCT SELECT id FROM test_table;
0
SELECT '--';
--
SELECT id FROM test_table INTERSECT SELECT id FROM test_table;
0
SELECT '--';
--
SELECT id FROM test_table EXCEPT SELECT id FROM test_table;
SELECT '--';
--
SELECT id FROM (SELECT id FROM test_table UNION ALL SELECT id FROM test_table);
0
0
SELECT '--';
--
SELECT id FROM (SELECT id FROM test_table UNION DISTINCT SELECT id FROM test_table);
0
SELECT '--';
--
SELECT id FROM (SELECT id FROM test_table INTERSECT SELECT id FROM test_table);
0
SELECT '--';
--
SELECT id FROM (SELECT id FROM test_table EXCEPT SELECT id FROM test_table);

View File

@ -0,0 +1,71 @@
SET use_analyzer = 0;
DROP TABLE IF EXISTS test_table;
CREATE TABLE test_table
(
id UInt64,
value String
) ENGINE=TinyLog;
INSERT INTO test_table VALUES (0, 'Value');
-- { echoOn }
SELECT 'Union constants';
SELECT 1 UNION ALL SELECT 1;
SELECT '--';
SELECT 1 UNION DISTINCT SELECT 1 UNION ALL SELECT 1;
SELECT '--';
SELECT 1 INTERSECT SELECT 1;
SELECT '--';
SELECT 1 EXCEPT SELECT 1;
SELECT '--';
SELECT id FROM (SELECT 1 AS id UNION ALL SELECT 1);
SELECT 'Union non constants';
SELECT value FROM (SELECT 1 as value UNION ALL SELECT 1 UNION ALL SELECT 1);
SELECT '--';
SELECT id FROM test_table UNION ALL SELECT id FROM test_table;
SELECT '--';
SELECT id FROM test_table UNION DISTINCT SELECT id FROM test_table;
SELECT '--';
SELECT id FROM test_table INTERSECT SELECT id FROM test_table;
SELECT '--';
SELECT id FROM test_table EXCEPT SELECT id FROM test_table;
SELECT '--';
SELECT id FROM (SELECT id FROM test_table UNION ALL SELECT id FROM test_table);
SELECT '--';
SELECT id FROM (SELECT id FROM test_table UNION DISTINCT SELECT id FROM test_table);
SELECT '--';
SELECT id FROM (SELECT id FROM test_table INTERSECT SELECT id FROM test_table);
SELECT '--';
SELECT id FROM (SELECT id FROM test_table EXCEPT SELECT id FROM test_table);
-- { echoOff }
DROP TABLE test_table;