ClickHouse/src/Analyzer/UnionNode.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

200 lines
6.5 KiB
C++
Raw Normal View History

2022-08-23 09:50:02 +00:00
#include <Analyzer/UnionNode.h>
#include <Common/SipHash.h>
2022-08-31 15:21:17 +00:00
#include <Common/FieldVisitorToString.h>
2022-08-23 09:50:02 +00:00
#include <IO/WriteBuffer.h>
#include <IO/WriteHelpers.h>
#include <IO/Operators.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/ASTSubquery.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTFunction.h>
#include <Core/ColumnWithTypeAndName.h>
2022-12-06 09:44:38 +00:00
#include <Core/NamesAndTypes.h>
2022-08-23 09:50:02 +00:00
#include <DataTypes/getLeastSupertype.h>
2022-12-06 09:44:38 +00:00
#include <Interpreters/Context.h>
2022-08-23 09:50:02 +00:00
#include <Analyzer/QueryNode.h>
#include <Analyzer/Utils.h>
namespace DB
{
namespace ErrorCodes
{
extern const int TYPE_MISMATCH;
2022-10-27 09:26:53 +00:00
extern const int BAD_ARGUMENTS;
2022-08-23 09:50:02 +00:00
}
2022-12-06 09:44:38 +00:00
UnionNode::UnionNode(ContextMutablePtr context_, SelectUnionMode union_mode_)
2022-10-07 10:44:28 +00:00
: IQueryTreeNode(children_size)
2022-12-06 09:44:38 +00:00
, context(std::move(context_))
, union_mode(union_mode_)
2022-08-23 09:50:02 +00:00
{
if (union_mode == SelectUnionMode::UNION_DEFAULT ||
union_mode == SelectUnionMode::EXCEPT_DEFAULT ||
union_mode == SelectUnionMode::INTERSECT_DEFAULT)
2022-10-31 10:45:12 +00:00
throw Exception(ErrorCodes::BAD_ARGUMENTS, "UNION mode {} must be normalized", toString(union_mode));
2022-08-23 09:50:02 +00:00
children[queries_child_index] = std::make_shared<ListNode>();
}
2022-08-31 15:21:17 +00:00
NamesAndTypes UnionNode::computeProjectionColumns() const
2022-08-23 09:50:02 +00:00
{
std::vector<NamesAndTypes> projections;
NamesAndTypes query_node_projection;
2022-10-17 17:46:30 +00:00
const auto & query_nodes = getQueries().getNodes();
projections.reserve(query_nodes.size());
2022-08-23 09:50:02 +00:00
for (const auto & query_node : query_nodes)
{
if (auto * query_node_typed = query_node->as<QueryNode>())
2022-10-19 10:25:27 +00:00
query_node_projection = query_node_typed->getProjectionColumns();
2022-08-23 09:50:02 +00:00
else if (auto * union_node_typed = query_node->as<UnionNode>())
2022-10-19 10:25:27 +00:00
query_node_projection = union_node_typed->computeProjectionColumns();
2022-08-23 09:50:02 +00:00
projections.push_back(query_node_projection);
if (query_node_projection.size() != projections.front().size())
throw Exception(ErrorCodes::TYPE_MISMATCH, "UNION different number of columns in queries");
}
2022-08-31 15:21:17 +00:00
NamesAndTypes result_columns;
2022-08-23 09:50:02 +00:00
size_t projections_size = projections.size();
DataTypes projection_column_types;
projection_column_types.resize(projections_size);
size_t columns_size = query_node_projection.size();
for (size_t column_index = 0; column_index < columns_size; ++column_index)
{
for (size_t projection_index = 0; projection_index < projections_size; ++projection_index)
projection_column_types[projection_index] = projections[projection_index][column_index].type;
auto result_type = getLeastSupertype(projection_column_types);
result_columns.emplace_back(projections.front()[column_index].name, std::move(result_type));
}
return result_columns;
}
void UnionNode::removeUnusedProjectionColumns(const std::unordered_set<std::string> & used_projection_columns)
{
auto projection_columns = computeProjectionColumns();
size_t projection_columns_size = projection_columns.size();
std::unordered_set<size_t> used_projection_column_indexes;
for (size_t i = 0; i < projection_columns_size; ++i)
{
const auto & projection_column = projection_columns[i];
if (used_projection_columns.contains(projection_column.name))
used_projection_column_indexes.insert(i);
}
auto & query_nodes = getQueries().getNodes();
for (auto & query_node : query_nodes)
{
if (auto * query_node_typed = query_node->as<QueryNode>())
query_node_typed->removeUnusedProjectionColumns(used_projection_column_indexes);
else if (auto * union_node_typed = query_node->as<UnionNode>())
union_node_typed->removeUnusedProjectionColumns(used_projection_column_indexes);
}
}
void UnionNode::removeUnusedProjectionColumns(const std::unordered_set<size_t> & used_projection_columns_indexes)
{
auto & query_nodes = getQueries().getNodes();
for (auto & query_node : query_nodes)
{
if (auto * query_node_typed = query_node->as<QueryNode>())
query_node_typed->removeUnusedProjectionColumns(used_projection_columns_indexes);
else if (auto * union_node_typed = query_node->as<UnionNode>())
union_node_typed->removeUnusedProjectionColumns(used_projection_columns_indexes);
}
}
2022-08-23 09:50:02 +00:00
void UnionNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const
{
buffer << std::string(indent, ' ') << "UNION id: " << format_state.getNodeId(this);
if (hasAlias())
buffer << ", alias: " << getAlias();
2022-10-19 10:25:27 +00:00
if (is_subquery)
buffer << ", is_subquery: " << is_subquery;
if (is_cte)
buffer << ", is_cte: " << is_cte;
2022-08-23 09:50:02 +00:00
if (!cte_name.empty())
buffer << ", cte_name: " << cte_name;
2022-09-06 16:46:30 +00:00
buffer << ", union_mode: " << toString(union_mode);
2022-08-23 09:50:02 +00:00
buffer << '\n' << std::string(indent + 2, ' ') << "QUERIES\n";
getQueriesNode()->dumpTreeImpl(buffer, format_state, indent + 4);
}
bool UnionNode::isEqualImpl(const IQueryTreeNode & rhs) const
{
const auto & rhs_typed = assert_cast<const UnionNode &>(rhs);
2022-08-31 15:21:17 +00:00
2022-10-19 10:25:27 +00:00
return is_subquery == rhs_typed.is_subquery && is_cte == rhs_typed.is_cte && cte_name == rhs_typed.cte_name &&
union_mode == rhs_typed.union_mode;
2022-08-23 09:50:02 +00:00
}
void UnionNode::updateTreeHashImpl(HashState & state) const
{
state.update(is_subquery);
state.update(is_cte);
state.update(cte_name.size());
state.update(cte_name);
2022-08-31 15:21:17 +00:00
2022-10-19 10:25:27 +00:00
state.update(static_cast<size_t>(union_mode));
2022-08-23 09:50:02 +00:00
}
QueryTreeNodePtr UnionNode::cloneImpl() const
{
2022-12-06 09:44:38 +00:00
auto result_union_node = std::make_shared<UnionNode>(context, union_mode);
2022-10-19 10:25:27 +00:00
result_union_node->is_subquery = is_subquery;
result_union_node->is_cte = is_cte;
result_union_node->cte_name = cte_name;
return result_union_node;
}
2023-03-14 09:14:58 +00:00
ASTPtr UnionNode::toASTImpl(const ConvertToASTOptions & options) const
2022-08-23 09:50:02 +00:00
{
auto select_with_union_query = std::make_shared<ASTSelectWithUnionQuery>();
select_with_union_query->union_mode = union_mode;
select_with_union_query->is_normalized = true;
select_with_union_query->children.push_back(getQueriesNode()->toAST(options));
2022-08-23 09:50:02 +00:00
select_with_union_query->list_of_selects = select_with_union_query->children.back();
2023-02-10 13:29:44 +00:00
if (is_subquery)
{
auto subquery = std::make_shared<ASTSubquery>();
subquery->cte_name = cte_name;
subquery->children.push_back(std::move(select_with_union_query));
return subquery;
}
2022-08-23 09:50:02 +00:00
return select_with_union_query;
}
}