ClickHouse/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp
2022-10-24 10:22:19 +02:00

2305 lines
91 KiB
C++

#include <Interpreters/InterpreterSelectQueryAnalyzer.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTSubquery.h>
#include <Core/QueryProcessingStage.h>
#include <Common/FieldVisitorToString.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/FieldToDataType.h>
#include <Columns/getLeastSuperColumn.h>
#include <Storages/SelectQueryInfo.h>
#include <Storages/IStorage.h>
#include <Analyzer/Utils.h>
#include <Analyzer/ConstantNode.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/ColumnNode.h>
#include <Analyzer/LambdaNode.h>
#include <Analyzer/TableNode.h>
#include <Analyzer/TableFunctionNode.h>
#include <Analyzer/QueryNode.h>
#include <Analyzer/JoinNode.h>
#include <Analyzer/ArrayJoinNode.h>
#include <Analyzer/UnionNode.h>
#include <Analyzer/QueryTreeBuilder.h>
#include <Analyzer/QueryTreePassManager.h>
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Functions/FunctionsMiscellaneous.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionsConversion.h>
#include <Functions/CastOverloadResolver.h>
#include <QueryPipeline/Pipe.h>
#include <Processors/Sources/SourceFromSingleChunk.h>
#include <Processors/Sources/NullSource.h>
#include <Processors/QueryPlan/QueryPlan.h>
#include <Processors/QueryPlan/ReadFromPreparedSource.h>
#include <Processors/QueryPlan/ExpressionStep.h>
#include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
#include <Processors/QueryPlan/JoinStep.h>
#include <Processors/QueryPlan/FilterStep.h>
#include <Processors/QueryPlan/ArrayJoinStep.h>
#include <Processors/QueryPlan/UnionStep.h>
#include <Processors/QueryPlan/DistinctStep.h>
#include <Processors/QueryPlan/IntersectOrExceptStep.h>
#include <QueryPipeline/QueryPipelineBuilder.h>
#include <Interpreters/Context.h>
#include <Interpreters/IJoin.h>
#include <Interpreters/TableJoin.h>
#include <Interpreters/HashJoin.h>
#include <Interpreters/ArrayJoinAction.h>
namespace DB
{
namespace ErrorCodes
{
extern const int UNSUPPORTED_METHOD;
extern const int LOGICAL_ERROR;
extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION;
extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION;
extern const int INVALID_JOIN_ON_EXPRESSION;
}
/** ClickHouse query planner.
*
* TODO: JOIN support ASOF. JOIN support strictness. JOIN support constants. JOIN support ON t1.id = t1.id
* TODO: JOIN drop unnecessary columns after ON, USING section
* TODO: Support display names
* TODO: Support RBAC. Support RBAC for ALIAS columns.
* TODO: Support distributed query processing
* TODO: Support PREWHERE
* TODO: Support GROUP BY, HAVING
* TODO: Support ORDER BY, LIMIT
* TODO: Support WINDOW FUNCTIONS
* TODO: Support DISTINCT
* TODO: Support building sets for IN functions
* TODO: Support trivial count optimization
* TODO: Support totals, extremes
* TODO: Support projections
* TODO: Support read in order optimization
* TODO: Simplify actions chain
* TODO: UNION storage limits
* TODO: Interpreter resources
*/
namespace
{
[[maybe_unused]] String dumpQueryPlan(QueryPlan & query_plan)
{
WriteBufferFromOwnString query_plan_buffer;
query_plan.explainPlan(query_plan_buffer, QueryPlan::ExplainPlanOptions{true, true, true, true});
return query_plan_buffer.str();
}
[[maybe_unused]] String dumpQueryPipeline(QueryPlan & query_plan)
{
QueryPlan::ExplainPipelineOptions explain_pipeline;
WriteBufferFromOwnString query_pipeline_buffer;
query_plan.explainPipeline(query_pipeline_buffer, explain_pipeline);
return query_pipeline_buffer.str();
}
Block getCommonHeaderForUnion(const Blocks & headers)
{
size_t num_selects = headers.size();
Block common_header = headers.front();
size_t num_columns = common_header.columns();
for (size_t query_num = 1; query_num < num_selects; ++query_num)
{
if (headers[query_num].columns() != num_columns)
throw Exception(ErrorCodes::TYPE_MISMATCH,
"Different number of columns in UNION elements: {} and {}",
common_header.dumpNames(),
headers[query_num].dumpNames());
}
std::vector<const ColumnWithTypeAndName *> columns(num_selects);
for (size_t column_num = 0; column_num < num_columns; ++column_num)
{
for (size_t i = 0; i < num_selects; ++i)
columns[i] = &headers[i].getByPosition(column_num);
ColumnWithTypeAndName & result_elem = common_header.getByPosition(column_num);
result_elem = getLeastSuperColumn(columns);
}
return common_header;
}
class ActionsChainNode;
using ActionsChainNodePtr = std::unique_ptr<ActionsChainNode>;
using ActionsChainNodes = std::vector<ActionsChainNodePtr>;
class ActionsChainNode
{
public:
explicit ActionsChainNode(ActionsDAGPtr actions_, bool available_output_columns_only_aliases_ = false)
: actions(std::move(actions_))
, available_output_columns_only_aliases(available_output_columns_only_aliases_)
{
initialize();
}
[[maybe_unused]] ActionsDAGPtr & getActions()
{
return actions;
}
[[maybe_unused]] const ActionsDAGPtr & getActions() const
{
return actions;
}
const ColumnsWithTypeAndName & getAvailableOutputColumns() const
{
return available_output_columns;
}
const NameSet & getInputColumnNames() const
{
return input_columns_names;
}
const NameSet & getChildRequiredOutputColumnsNames() const
{
return child_required_output_columns_names;
}
void finalizeInputAndOutputColumns(NameSet & child_input_columns)
{
child_required_output_columns_names.clear();
std::vector<const ActionsDAG::Node *> required_output_nodes;
for (const auto & node : actions->getNodes())
{
auto it = child_input_columns.find(node.result_name);
if (it == child_input_columns.end())
continue;
child_required_output_columns_names.insert(node.result_name);
required_output_nodes.push_back(&node);
child_input_columns.erase(it);
}
for (auto & required_output_node : required_output_nodes)
actions->addOrReplaceInOutputs(*required_output_node);
actions->removeUnusedActions();
initialize();
}
void dump(WriteBuffer & buffer) const
{
buffer << "DAG" << '\n';
buffer << actions->dumpDAG();
if (!child_required_output_columns_names.empty())
{
buffer << "Child required output columns " << boost::join(child_required_output_columns_names, ", ");
buffer << '\n';
}
}
[[maybe_unused]] String dump() const
{
WriteBufferFromOwnString buffer;
dump(buffer);
return buffer.str();
}
[[maybe_unused]] void addParentIndex(size_t parent_node_index)
{
parent_nodes_indices.push_back(parent_node_index);
}
void addParentIndices(const std::vector<size_t> & parent_nodes_indices_value)
{
parent_nodes_indices.insert(parent_nodes_indices.end(), parent_nodes_indices_value.begin(), parent_nodes_indices_value.end());
}
const std::vector<size_t> & getParentNodesIndices() const
{
return parent_nodes_indices;
}
private:
void initialize()
{
auto required_columns_names = actions->getRequiredColumnsNames();
input_columns_names = NameSet(required_columns_names.begin(), required_columns_names.end());
available_output_columns.clear();
for (const auto & node : actions->getNodes())
{
if (available_output_columns_only_aliases)
{
if (node.type == ActionsDAG::ActionType::ALIAS)
available_output_columns.emplace_back(node.column, node.result_type, node.result_name);
continue;
}
if (node.type == ActionsDAG::ActionType::INPUT ||
node.type == ActionsDAG::ActionType::FUNCTION ||
node.type == ActionsDAG::ActionType::ARRAY_JOIN)
available_output_columns.emplace_back(node.column, node.result_type, node.result_name);
}
}
ActionsDAGPtr actions;
bool available_output_columns_only_aliases;
NameSet input_columns_names;
NameSet child_required_output_columns_names;
ColumnsWithTypeAndName available_output_columns;
std::vector<size_t> parent_nodes_indices;
};
class ActionsChain
{
public:
void addNode(ActionsChainNodePtr node)
{
nodes.emplace_back(std::move(node));
}
[[maybe_unused]] const ActionsChainNodes & getNodes() const
{
return nodes;
}
ColumnsWithTypeAndName getAvailableOutputColumns(const std::vector<size_t> & nodes_indices)
{
ColumnsWithTypeAndName result;
for (const auto & node_index : nodes_indices)
{
assert(node_index < nodes.size());
const auto & node_available_output_columns = nodes[node_index]->getAvailableOutputColumns();
result.insert(result.end(), node_available_output_columns.begin(), node_available_output_columns.end());
}
return result;
}
// ColumnsWithTypeAndName getOutputColumns(const std::vector<size_t> & nodes_indices)
// {
// ColumnsWithTypeAndName result;
// for (const auto & node_index : nodes_indices)
// {
// assert(node_index < nodes.size());
// const auto & node_output_columns = nodes[node_index]->getActions()->getResultColumns();
// result.insert(result.end(), node_output_columns.begin(), node_output_columns.end());
// }
// return result;
// }
[[maybe_unused]] NameSet getInputColumnNames(const std::vector<size_t> & nodes_indices)
{
NameSet result;
for (const auto & node_index : nodes_indices)
{
assert(node_index < nodes.size());
const auto & node_input_column_names = nodes[node_index]->getInputColumnNames();
result.insert(node_input_column_names.begin(), node_input_column_names.end());
}
return result;
}
[[maybe_unused]] size_t size() const
{
return nodes.size();
}
[[maybe_unused]] const ActionsChainNodePtr & at(size_t index) const
{
if (index >= nodes.size())
throw std::out_of_range("actions chain access is out of range");
return nodes[index];
}
[[maybe_unused]] ActionsChainNodePtr & at(size_t index)
{
if (index >= nodes.size())
throw std::out_of_range("actions chain access is out of range");
return nodes[index];
}
[[maybe_unused]] ActionsChainNodePtr & operator[](size_t index)
{
return nodes[index];
}
[[maybe_unused]] const ActionsChainNodePtr & operator[](size_t index) const
{
return nodes[index];
}
[[maybe_unused]] ActionsChainNode * getLastNode()
{
return nodes.back().get();
}
[[maybe_unused]] ActionsChainNode * getLastNodeOrThrow()
{
if (nodes.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "ActionsChain is empty");
return nodes.back().get();
}
size_t getLastNodeIndex()
{
return nodes.size() - 1;
}
[[maybe_unused]] size_t getLastNodeIndexOrThrow()
{
if (nodes.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "ActionsChain is empty");
return nodes.size() - 1;
}
void finalize()
{
if (nodes.empty())
return;
std::deque<size_t> nodes_indices_to_process;
nodes_indices_to_process.push_front(nodes.size() - 1);
/// For root node there are no columns required in child nodes
NameSet empty_child_input_columns;
nodes.back().get()->finalizeInputAndOutputColumns(empty_child_input_columns);
while (!nodes_indices_to_process.empty())
{
auto node_index_to_process = nodes_indices_to_process.front();
nodes_indices_to_process.pop_front();
auto & node_to_process = nodes[node_index_to_process];
const auto & parent_nodes_indices = node_to_process->getParentNodesIndices();
auto input_columns_names_copy = node_to_process->getInputColumnNames();
for (const auto & parent_node_index : parent_nodes_indices)
{
assert(parent_node_index < nodes.size());
auto & parent_node = nodes[parent_node_index];
parent_node->finalizeInputAndOutputColumns(input_columns_names_copy);
nodes_indices_to_process.push_back(parent_node_index);
}
}
}
void dump(WriteBuffer & buffer) const
{
size_t nodes_size = nodes.size();
for (size_t i = 0; i < nodes_size; ++i)
{
const auto & node = nodes[i];
buffer << "Node " << i;
const auto & parent_nodes_indices = node->getParentNodesIndices();
if (!parent_nodes_indices.empty())
{
buffer << " parent nodes indices ";
for (const auto & parent_node_index : parent_nodes_indices)
buffer << parent_node_index << ' ';
}
buffer << '\n';
node->dump(buffer);
buffer << '\n';
}
}
[[maybe_unused]] String dump() const
{
WriteBufferFromOwnString buffer;
dump(buffer);
return buffer.str();
}
private:
ActionsChainNodes nodes;
};
using ColumnIdentifier = std::string;
struct TableExpressionColumns
{
/// Valid for table, table function, query table expression nodes
NamesAndTypesList source_columns;
/// Valid for table, table function, query table expression nodes
NameSet source_columns_names;
/// Valid only for table table expression node
NameSet alias_columns;
/// Valid for table, table function, query table expression nodes
std::unordered_map<std::string, ColumnIdentifier> column_name_to_column_identifier;
};
using TableExpressionNodeToColumns = std::unordered_map<const IQueryTreeNode *, TableExpressionColumns>;
struct PlannerContext
{
std::unordered_map<const IQueryTreeNode *, ColumnIdentifier> column_node_to_column_identifier;
std::unordered_map<const IQueryTreeNode *, std::string> table_expression_node_to_identifier;
TableExpressionNodeToColumns table_expression_node_to_columns;
size_t column_identifier_counter = 0;
ActionsChain actions_chain;
ActionsDAGPtr where_actions;
size_t where_actions_chain_node_index = 0;
std::string where_action_node_name;
ActionsDAGPtr projection_actions;
ContextPtr query_context;
ColumnIdentifier getColumnUniqueIdentifier(const IQueryTreeNode * column_source_node, std::string column_name = {})
{
auto column_unique_prefix = "__column_" + std::to_string(column_identifier_counter);
++column_identifier_counter;
std::string table_expression_identifier;
auto table_expression_identifier_it = table_expression_node_to_identifier.find(column_source_node);
if (table_expression_identifier_it != table_expression_node_to_identifier.end())
table_expression_identifier = table_expression_identifier_it->second;
std::string debug_identifier_suffix;
if (column_source_node->hasAlias())
{
debug_identifier_suffix += column_source_node->getAlias();
}
else if (const auto * table_source_node = column_source_node->as<TableNode>())
{
debug_identifier_suffix += table_source_node->getStorageID().getFullNameNotQuoted();
}
else
{
auto column_source_node_type = column_source_node->getNodeType();
if (column_source_node_type == QueryTreeNodeType::JOIN)
debug_identifier_suffix += "join";
else if (column_source_node_type == QueryTreeNodeType::ARRAY_JOIN)
debug_identifier_suffix += "array_join";
else if (column_source_node_type == QueryTreeNodeType::TABLE_FUNCTION)
debug_identifier_suffix += "table_function";
else if (column_source_node_type == QueryTreeNodeType::QUERY)
debug_identifier_suffix += "subquery";
if (!table_expression_identifier.empty())
debug_identifier_suffix += '_' + table_expression_identifier;
}
if (!column_name.empty())
debug_identifier_suffix += '.' + column_name;
if (!debug_identifier_suffix.empty())
column_unique_prefix += '_' + debug_identifier_suffix;
return column_unique_prefix;
}
ColumnIdentifier getColumnIdentifierOrThrow(const IQueryTreeNode * column_source_node)
{
assert(column_source_node->getNodeType() == QueryTreeNodeType::COLUMN);
auto it = column_node_to_column_identifier.find(column_source_node);
if (it == column_node_to_column_identifier.end())
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Column identifier is not initialized for column {}",
column_source_node->formatASTForErrorMessage());
return it->second;
}
};
struct QueryTreeActionsScopeNode
{
explicit QueryTreeActionsScopeNode(ActionsDAGPtr actions_dag_, QueryTreeNodePtr scope_node_)
: actions_dag(std::move(actions_dag_))
, scope_node(std::move(scope_node_))
{
for (const auto & node : actions_dag->getNodes())
node_name_to_node[node.result_name] = &node;
}
[[maybe_unused]] bool containsNode(const std::string & node_name)
{
return node_name_to_node.find(node_name) != node_name_to_node.end();
}
[[maybe_unused]] const ActionsDAG::Node * tryGetNode(const std::string & node_name)
{
auto it = node_name_to_node.find(node_name);
if (it == node_name_to_node.end())
return {};
return it->second;
}
const ActionsDAG::Node * getNodeOrThrow(const std::string & node_name)
{
auto it = node_name_to_node.find(node_name);
if (it == node_name_to_node.end())
throw Exception(ErrorCodes::LOGICAL_ERROR,
"No node with name {}. There are only nodes {}",
node_name,
actions_dag->dumpNames());
return it->second;
}
const ActionsDAG::Node * addInputColumnIfNecessary(const std::string & node_name, const DataTypePtr & column_type)
{
auto it = node_name_to_node.find(node_name);
if (it != node_name_to_node.end())
return it->second;
const auto * node = &actions_dag->addInput(node_name, column_type);
// std::cout << "QueryTreeActionsScopeNode::addInputColumnIfNecessary dag " << actions_dag << " node name " << node_name;
// std::cout << " result node ptr " << node << std::endl;
node_name_to_node[node->result_name] = node;
return node;
}
const ActionsDAG::Node * addInputConstantColumnIfNecessary(const std::string & node_name, const ColumnWithTypeAndName & column)
{
auto it = node_name_to_node.find(node_name);
if (it != node_name_to_node.end())
return it->second;
const auto * node = &actions_dag->addInput(column);
node_name_to_node[node->result_name] = node;
return node;
}
const ActionsDAG::Node * addConstantIfNecessary(const std::string & node_name, const ColumnWithTypeAndName & column)
{
auto it = node_name_to_node.find(node_name);
if (it != node_name_to_node.end())
return it->second;
const auto * node = &actions_dag->addColumn(column);
// std::cout << "QueryTreeActionsScopeNode::addConstantIfNecessary dag " << actions_dag << " node name " << node_name;
// std::cout << " result node ptr " << node << std::endl;
node_name_to_node[node->result_name] = node;
return node;
}
const ActionsDAG::Node * addFunctionIfNecessary(const std::string & node_name, ActionsDAG::NodeRawConstPtrs children, FunctionOverloadResolverPtr function)
{
auto it = node_name_to_node.find(node_name);
if (it != node_name_to_node.end())
return it->second;
const auto * node = &actions_dag->addFunction(function, children, node_name);
// std::cout << "QueryTreeActionsScopeNode::addFunctionIfNecessary dag " << actions_dag << " node name " << node_name;
// std::cout << " result node ptr " << node << std::endl;
node_name_to_node[node->result_name] = node;
return node;
}
const ActionsDAG::Node * addArrayJoinIfNecessary(const std::string & node_name, const ActionsDAG::Node * child)
{
auto it = node_name_to_node.find(node_name);
if (it != node_name_to_node.end())
return it->second;
const auto * node = &actions_dag->addArrayJoin(*child, node_name);
node_name_to_node[node->result_name] = node;
return node;
}
std::unordered_map<std::string_view, const ActionsDAG::Node *> node_name_to_node;
ActionsDAGPtr actions_dag;
QueryTreeNodePtr scope_node;
};
class QueryTreeActionsVisitor
{
public:
explicit QueryTreeActionsVisitor(ActionsDAGPtr actions_dag, const PlannerContext & planner_context_)
: planner_context(planner_context_)
{
actions_stack.emplace_back(std::move(actions_dag), nullptr);
}
ActionsDAG::NodeRawConstPtrs visit(QueryTreeNodePtr expression_node)
{
ActionsDAG::NodeRawConstPtrs result;
if (auto * expression_list_node = expression_node->as<ListNode>())
{
for (auto & node : expression_list_node->getNodes())
{
auto [node_name, _] = visitImpl(node);
result.push_back(actions_stack.front().getNodeOrThrow(node_name));
}
}
else
{
auto [node_name, _] = visitImpl(expression_node);
result.push_back(actions_stack.front().getNodeOrThrow(node_name));
}
return result;
}
private:
using NodeNameAndNodeMinLevel = std::pair<std::string, size_t>;
NodeNameAndNodeMinLevel visitImpl(QueryTreeNodePtr node)
{
if (auto * column_node = node->as<ColumnNode>())
return visitColumn(node);
else if (auto * constant_node = node->as<ConstantNode>())
return visitConstant(node);
else if (auto * function_node = node->as<FunctionNode>())
return visitFunction(node);
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Expected only column, constant or function node. Actual {}", node->formatASTForErrorMessage());
}
NodeNameAndNodeMinLevel visitColumn(const QueryTreeNodePtr & node)
{
auto column_node_name = getActionsDAGNodeName(node.get());
const auto & column_node = node->as<ColumnNode &>();
Int64 actions_stack_size = static_cast<Int64>(actions_stack.size() - 1);
for (Int64 i = actions_stack_size; i >= 0; --i)
{
actions_stack[i].addInputColumnIfNecessary(column_node_name, column_node.getColumnType());
if (column_node.getColumnSource()->getNodeType() == QueryTreeNodeType::LAMBDA &&
actions_stack[i].scope_node.get() == column_node.getColumnSource().get())
{
return {column_node_name, i};
}
}
return {column_node_name, 0};
}
NodeNameAndNodeMinLevel visitConstant(const QueryTreeNodePtr & node)
{
auto constant_node_name = getActionsDAGNodeName(node.get());
const auto & constant_node = node->as<ConstantNode &>();
const auto & literal = constant_node.getConstantValue();
ColumnWithTypeAndName column;
column.name = constant_node_name;
column.type = constant_node.getResultType();
column.column = column.type->createColumnConst(1, literal);
actions_stack[0].addConstantIfNecessary(constant_node_name, column);
size_t actions_stack_size = actions_stack.size();
for (size_t i = 1; i < actions_stack_size; ++i)
{
auto & actions_stack_node = actions_stack[i];
actions_stack_node.addInputConstantColumnIfNecessary(constant_node_name, column);
}
return {constant_node_name, 0};
}
NodeNameAndNodeMinLevel visitLambda(const QueryTreeNodePtr & node)
{
auto & lambda_node = node->as<LambdaNode &>();
auto result_type = lambda_node.getResultType();
if (!result_type)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Lambda {} is not resolved during query analysis",
lambda_node.formatASTForErrorMessage());
NamesAndTypesList lambda_arguments_names_and_types;
for (auto & lambda_node_argument : lambda_node.getArguments().getNodes())
{
auto lambda_argument_name = lambda_node_argument->getName();
auto lambda_argument_type = lambda_node_argument->getResultType();
lambda_arguments_names_and_types.emplace_back(lambda_argument_name, lambda_argument_type);
}
size_t previous_scope_node_actions_stack_index = actions_stack.size() - 1;
auto lambda_actions_dag = std::make_shared<ActionsDAG>();
actions_stack.emplace_back(lambda_actions_dag, node);
auto [lambda_expression_node_name, level] = visitImpl(lambda_node.getExpression());
lambda_actions_dag->getOutputs().push_back(actions_stack.back().getNodeOrThrow(lambda_expression_node_name));
lambda_actions_dag->removeUnusedActions(Names(1, lambda_expression_node_name));
auto lambda_actions = std::make_shared<ExpressionActions>(
lambda_actions_dag, ExpressionActionsSettings::fromContext(planner_context.query_context, CompileExpressions::yes));
Names captured_column_names;
ActionsDAG::NodeRawConstPtrs lambda_children;
Names required_column_names = lambda_actions->getRequiredColumns();
const auto & lambda_argument_names = lambda_node.getArgumentNames();
for (const auto & required_column_name : required_column_names)
{
auto it = std::find_if(
lambda_argument_names.begin(), lambda_argument_names.end(), [&](auto & value) { return value == required_column_name; });
if (it == lambda_argument_names.end())
{
lambda_children.push_back(actions_stack[previous_scope_node_actions_stack_index].getNodeOrThrow(required_column_name));
captured_column_names.push_back(required_column_name);
}
}
auto lambda_node_name = getActionsDAGNodeName(node.get());
auto function_capture = std::make_shared<FunctionCaptureOverloadResolver>(
lambda_actions, captured_column_names, lambda_arguments_names_and_types, result_type, lambda_expression_node_name);
actions_stack.pop_back();
if (level == actions_stack.size())
--level;
actions_stack[level].addFunctionIfNecessary(lambda_node_name, lambda_children, function_capture);
size_t actions_stack_size = actions_stack.size();
for (size_t i = level + 1; i < actions_stack_size; ++i)
{
auto & actions_stack_node = actions_stack[i];
actions_stack_node.addInputColumnIfNecessary(lambda_node_name, result_type);
}
return {lambda_node_name, level};
}
NodeNameAndNodeMinLevel visitFunction(const QueryTreeNodePtr & node)
{
auto function_node_name = getActionsDAGNodeName(node.get());
const auto & function_node = node->as<FunctionNode &>();
if (function_node.getFunctionName() == "grouping")
{
size_t arguments_size = function_node.getArguments().getNodes().size();
if (arguments_size == 0)
throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Function GROUPING expects at least one argument");
else if (arguments_size > 64)
throw Exception(ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION, "Function GROUPING can have up to 64 arguments, but {} provided", arguments_size);
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Function GROUPING is not supported");
}
else if (isNameOfInFunction(function_node.getFunctionName()))
{
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Function IN is not supported");
}
if (function_node.isAggregateFunction())
{
size_t actions_stack_size = actions_stack.size();
for (size_t i = 0; i < actions_stack_size; ++i)
{
auto & actions_stack_node = actions_stack[i];
actions_stack_node.addInputColumnIfNecessary(function_node_name, function_node.getResultType());
}
return {function_node_name, 0};
}
const auto & function_arguments = function_node.getArguments().getNodes();
size_t function_arguments_size = function_arguments.size();
Names function_arguments_node_names;
function_arguments_node_names.reserve(function_arguments_size);
size_t level = 0;
for (const auto & argument : function_arguments)
{
if (argument->getNodeType() == QueryTreeNodeType::LAMBDA)
{
auto [node_name, node_min_level] = visitLambda(argument);
function_arguments_node_names.push_back(std::move(node_name));
level = std::max(level, node_min_level);
continue;
}
auto [node_name, node_min_level] = visitImpl(argument);
function_arguments_node_names.push_back(std::move(node_name));
level = std::max(level, node_min_level);
}
ActionsDAG::NodeRawConstPtrs children;
children.reserve(function_arguments_size);
for (auto & function_argument_node_name : function_arguments_node_names)
children.push_back(actions_stack[level].getNodeOrThrow(function_argument_node_name));
if (function_node.getFunctionName() == "arrayJoin")
{
if (level != 0)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Expression in arrayJoin cannot depend on lambda argument: {} ",
function_arguments_node_names.at(0));
actions_stack[level].addArrayJoinIfNecessary(function_node_name, children.at(0));
}
else
{
actions_stack[level].addFunctionIfNecessary(function_node_name, children, function_node.getFunction());
}
size_t actions_stack_size = actions_stack.size();
for (size_t i = level + 1; i < actions_stack_size; ++i)
{
auto & actions_stack_node = actions_stack[i];
actions_stack_node.addInputColumnIfNecessary(function_node_name, function_node.getResultType());
}
return {function_node_name, level};
}
String getActionsDAGNodeName(const IQueryTreeNode * node) const
{
String result;
auto node_type = node->getNodeType();
switch (node_type)
{
case QueryTreeNodeType::COLUMN:
{
auto it = planner_context.column_node_to_column_identifier.find(node);
if (it == planner_context.column_node_to_column_identifier.end())
result = node->getName();
else
result = it->second;
break;
}
case QueryTreeNodeType::CONSTANT:
{
result = "__constant_" + node->getName();
break;
}
case QueryTreeNodeType::FUNCTION:
{
const auto & function_node = node->as<FunctionNode &>();
WriteBufferFromOwnString buffer;
buffer << "__function_" + function_node.getFunctionName();
const auto & function_parameters_nodes = function_node.getParameters().getNodes();
if (!function_parameters_nodes.empty())
{
buffer << '(';
size_t function_parameters_nodes_size = function_parameters_nodes.size();
for (size_t i = 0; i < function_parameters_nodes_size; ++i)
{
const auto & function_parameter_node = function_parameters_nodes[i];
getActionsDAGNodeName(function_parameter_node.get());
if (i + 1 != function_parameters_nodes_size)
buffer << ", ";
}
buffer << ')';
}
const auto & function_arguments_nodes = function_node.getArguments().getNodes();
buffer << '(';
size_t function_arguments_nodes_size = function_arguments_nodes.size();
for (size_t i = 0; i < function_arguments_nodes_size; ++i)
{
const auto & function_argument_node = function_arguments_nodes[i];
buffer << getActionsDAGNodeName(function_argument_node.get());
if (i + 1 != function_arguments_nodes_size)
buffer << ", ";
}
buffer << ')';
result = buffer.str();
break;
}
case QueryTreeNodeType::QUERY:
{
auto query_hash = node->getTreeHash();
result = "__subquery_" + std::to_string(query_hash.first) + '_' + std::to_string(query_hash.second);
break;
}
case QueryTreeNodeType::LAMBDA:
{
auto lambda_hash = node->getTreeHash();
result = "__lambda_" + toString(lambda_hash.first) + '_' + toString(lambda_hash.second);
break;
}
default:
{
result = node->getName();
break;
}
}
return result;
}
std::vector<QueryTreeActionsScopeNode> actions_stack;
const PlannerContext & planner_context;
};
class CollectTableExpressionIdentifiersVisitor
{
public:
void visit(const QueryTreeNodePtr & join_tree_node, PlannerContext & planner_context)
{
auto join_tree_node_type = join_tree_node->getNodeType();
switch (join_tree_node_type)
{
case QueryTreeNodeType::QUERY:
[[fallthrough]];
case QueryTreeNodeType::UNION:
[[fallthrough]];
case QueryTreeNodeType::TABLE:
[[fallthrough]];
case QueryTreeNodeType::TABLE_FUNCTION:
{
std::string table_expression_identifier = std::to_string(planner_context.table_expression_node_to_identifier.size());
planner_context.table_expression_node_to_identifier.emplace(join_tree_node.get(), table_expression_identifier);
break;
}
case QueryTreeNodeType::JOIN:
{
auto & join_node = join_tree_node->as<JoinNode &>();
visit(join_node.getLeftTableExpression(), planner_context);
std::string table_expression_identifier = std::to_string(planner_context.table_expression_node_to_identifier.size());
planner_context.table_expression_node_to_identifier.emplace(join_tree_node.get(), table_expression_identifier);
visit(join_node.getRightTableExpression(), planner_context);
break;
}
case QueryTreeNodeType::ARRAY_JOIN:
{
auto & array_join_node = join_tree_node->as<ArrayJoinNode &>();
visit(array_join_node.getTableExpression(), planner_context);
std::string table_expression_identifier = std::to_string(planner_context.table_expression_node_to_identifier.size());
planner_context.table_expression_node_to_identifier.emplace(join_tree_node.get(), table_expression_identifier);
break;
}
default:
{
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Expected query, table, table function, join or array join query node. Actual {}",
join_tree_node->formatASTForErrorMessage());
}
}
}
};
class CollectSourceColumnsMatcher
{
public:
using Visitor = InDepthQueryTreeVisitor<CollectSourceColumnsMatcher, true, false>;
struct Data
{
PlannerContext & planner_context;
};
static void visit(QueryTreeNodePtr & node, Data & data)
{
auto * column_node = node->as<ColumnNode>();
if (!column_node)
return;
auto column_source_node = column_node->getColumnSource();
auto column_source_node_type = column_source_node->getNodeType();
if (column_source_node_type == QueryTreeNodeType::ARRAY_JOIN ||
column_source_node_type == QueryTreeNodeType::LAMBDA)
return;
/// JOIN using expression
if (column_node->hasExpression() && column_source_node->getNodeType() == QueryTreeNodeType::JOIN)
return;
auto & table_expression_node_to_columns = data.planner_context.table_expression_node_to_columns;
auto & table_expression_column_node_to_column_identifier = data.planner_context.column_node_to_column_identifier;
auto [it, _] = table_expression_node_to_columns.emplace(column_source_node.get(), TableExpressionColumns());
auto & table_expression_columns = it->second;
if (column_node->hasExpression())
{
/// Replace ALIAS column with expression
table_expression_columns.alias_columns.insert(column_node->getColumnName());
node = column_node->getExpression();
visit(node, data);
return;
}
if (column_source_node_type != QueryTreeNodeType::TABLE &&
column_source_node_type != QueryTreeNodeType::TABLE_FUNCTION &&
column_source_node_type != QueryTreeNodeType::QUERY &&
column_source_node_type != QueryTreeNodeType::UNION)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Expected table, table function, query or union column source. Actual {}",
column_source_node->formatASTForErrorMessage());
auto [source_columns_set_it, inserted] = it->second.source_columns_names.insert(column_node->getColumnName());
if (inserted)
{
auto column_identifier = data.planner_context.getColumnUniqueIdentifier(column_source_node.get(), column_node->getColumnName());
table_expression_column_node_to_column_identifier.emplace(column_node, column_identifier);
it->second.column_name_to_column_identifier.emplace(column_node->getColumnName(), column_identifier);
it->second.source_columns.emplace_back(column_node->getColumn());
}
else
{
auto column_identifier_it = it->second.column_name_to_column_identifier.find(column_node->getColumnName());
if (column_identifier_it == it->second.column_name_to_column_identifier.end())
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Column node {} column identifier is not initialized",
column_node->formatASTForErrorMessage());
table_expression_column_node_to_column_identifier.emplace(column_node, column_identifier_it->second);
}
}
static bool needChildVisit(const QueryTreeNodePtr &, const QueryTreeNodePtr & child_node)
{
return child_node->getNodeType() != QueryTreeNodeType::QUERY;
}
};
using CollectSourceColumnsVisitor = CollectSourceColumnsMatcher::Visitor;
ActionsDAGPtr convertExpressionNodeIntoDAG(const QueryTreeNodePtr & expression_node, const ColumnsWithTypeAndName & inputs, const PlannerContext & planner_context)
{
ActionsDAGPtr action_dag = std::make_shared<ActionsDAG>(inputs);
QueryTreeActionsVisitor actions_visitor(action_dag, planner_context);
auto expression_dag_index_nodes = actions_visitor.visit(expression_node);
action_dag->getOutputs().clear();
for (auto & expression_dag_index_node : expression_dag_index_nodes)
action_dag->getOutputs().push_back(expression_dag_index_node);
return action_dag;
}
QueryPlan buildQueryPlanForJoinTreeNode(QueryTreeNodePtr join_tree_node,
SelectQueryInfo & select_query_info,
const SelectQueryOptions & select_query_options,
PlannerContext & planner_context);
QueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expression,
SelectQueryInfo & table_expression_query_info,
const SelectQueryOptions & select_query_options,
PlannerContext & planner_context)
{
auto * table_node = table_expression->as<TableNode>();
auto * table_function_node = table_expression->as<TableFunctionNode>();
auto * query_node = table_expression->as<QueryNode>();
auto * union_node = table_expression->as<UnionNode>();
QueryPlan query_plan;
/** Use default columns to support case when there are no columns in query.
* Example: SELECT 1;
*/
const auto & [it, _] = planner_context.table_expression_node_to_columns.emplace(table_expression.get(), TableExpressionColumns());
auto & table_expression_columns = it->second;
if (table_node || table_function_node)
{
const auto & storage = table_node ? table_node->getStorage() : table_function_node->getStorage();
const auto & storage_snapshot = table_node ? table_node->getStorageSnapshot() : table_function_node->getStorageSnapshot();
auto from_stage = storage->getQueryProcessingStage(planner_context.query_context, select_query_options.to_stage, storage_snapshot, table_expression_query_info);
Names column_names(table_expression_columns.source_columns_names.begin(), table_expression_columns.source_columns_names.end());
std::optional<NameAndTypePair> read_additional_column;
bool plan_has_multiple_table_expressions = planner_context.table_expression_node_to_columns.size() > 1;
if (column_names.empty() && (plan_has_multiple_table_expressions || storage->getName() == "SystemOne"))
{
auto column_names_and_types = storage_snapshot->getColumns(GetColumnsOptions(GetColumnsOptions::All).withSubcolumns());
read_additional_column = column_names_and_types.front();
}
if (read_additional_column)
{
auto column_identifier = planner_context.getColumnUniqueIdentifier(table_expression.get(), read_additional_column->name);
column_names.push_back(read_additional_column->name);
table_expression_columns.source_columns_names.emplace(read_additional_column->name);
table_expression_columns.source_columns.emplace_back(*read_additional_column);
table_expression_columns.column_name_to_column_identifier.emplace(read_additional_column->name, column_identifier);
}
if (!column_names.empty())
{
size_t max_block_size = planner_context.query_context->getSettingsRef().max_block_size;
size_t max_streams = planner_context.query_context->getSettingsRef().max_threads;
storage->read(query_plan, column_names, storage_snapshot, table_expression_query_info, planner_context.query_context, from_stage, max_block_size, max_streams);
}
/// Create step which reads from empty source if storage has no data.
if (!query_plan.isInitialized())
{
auto source_header = storage_snapshot->getSampleBlockForColumns(column_names);
Pipe pipe(std::make_shared<NullSource>(source_header));
auto read_from_pipe = std::make_unique<ReadFromPreparedSource>(std::move(pipe));
read_from_pipe->setStepDescription("Read from NullSource");
query_plan.addStep(std::move(read_from_pipe));
}
}
else if (query_node || union_node)
{
InterpreterSelectQueryAnalyzer interpeter(table_expression, select_query_options, planner_context.query_context);
interpeter.initializeQueryPlanIfNeeded();
query_plan = std::move(interpeter).extractQueryPlan();
}
else
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected table, table function, query or union. Actual {}", table_expression->formatASTForErrorMessage());
}
auto rename_actions_dag = std::make_shared<ActionsDAG>(query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName());
for (const auto & [column_name, column_identifier] : table_expression_columns.column_name_to_column_identifier)
{
auto position = query_plan.getCurrentDataStream().header.getPositionByName(column_name);
const auto * node_to_rename = rename_actions_dag->getOutputs()[position];
rename_actions_dag->getOutputs()[position] = &rename_actions_dag->addAlias(*node_to_rename, column_identifier);
}
auto rename_step = std::make_unique<ExpressionStep>(query_plan.getCurrentDataStream(), rename_actions_dag);
rename_step->setStepDescription("Change column names to column identifiers");
query_plan.addStep(std::move(rename_step));
return query_plan;
}
class JoinClause
{
public:
void addKey(const ActionsDAG::Node * left_key_node, const ActionsDAG::Node * right_key_node)
{
left_key_nodes.emplace_back(left_key_node);
right_key_nodes.emplace_back(right_key_node);
}
void addCondition(JoinTableSide table_side, const ActionsDAG::Node * condition_node)
{
auto & filter_condition_nodes = table_side == JoinTableSide::Left ? left_filter_condition_nodes : right_filter_condition_nodes;
filter_condition_nodes.push_back(condition_node);
}
const ActionsDAG::NodeRawConstPtrs & getLeftKeyNodes() const
{
return left_key_nodes;
}
const ActionsDAG::NodeRawConstPtrs & getRightKeyNodes() const
{
return right_key_nodes;
}
ActionsDAG::NodeRawConstPtrs & getLeftKeyNodes()
{
return left_key_nodes;
}
ActionsDAG::NodeRawConstPtrs & getRightKeyNodes()
{
return right_key_nodes;
}
const ActionsDAG::NodeRawConstPtrs & getLeftFilterConditionNodes() const
{
return left_filter_condition_nodes;
}
const ActionsDAG::NodeRawConstPtrs & getRightFilterConditionNodes() const
{
return right_filter_condition_nodes;
}
void clearConditionNodes(JoinTableSide table_side)
{
auto & filter_condition_nodes = table_side == JoinTableSide::Left ? left_filter_condition_nodes : right_filter_condition_nodes;
filter_condition_nodes.clear();
}
void dump(WriteBuffer & buffer) const
{
auto dump_dag_nodes = [&](const ActionsDAG::NodeRawConstPtrs & dag_nodes)
{
String dag_nodes_dump;
if (!dag_nodes.empty())
{
for (const auto & dag_node : dag_nodes)
{
dag_nodes_dump += dag_node->result_name;
dag_nodes_dump += ", ";
}
dag_nodes_dump.pop_back();
dag_nodes_dump.pop_back();
}
return dag_nodes_dump;
};
buffer << "left_key_nodes: " << dump_dag_nodes(left_key_nodes);
buffer << " right_key_nodes: " << dump_dag_nodes(right_key_nodes);
if (!left_filter_condition_nodes.empty())
buffer << " left_condition_nodes: " + dump_dag_nodes(left_filter_condition_nodes);
if (!right_filter_condition_nodes.empty())
buffer << " left_condition_nodes: " + dump_dag_nodes(right_filter_condition_nodes);
}
[[maybe_unused]] String dump() const
{
WriteBufferFromOwnString buffer;
dump(buffer);
return buffer.str();
}
private:
ActionsDAG::NodeRawConstPtrs left_key_nodes;
ActionsDAG::NodeRawConstPtrs right_key_nodes;
ActionsDAG::NodeRawConstPtrs left_filter_condition_nodes;
ActionsDAG::NodeRawConstPtrs right_filter_condition_nodes;
};
using JoinClauses = std::vector<JoinClause>;
std::optional<JoinTableSide> extractJoinTableSideFromExpression(const ActionsDAG::Node * expression_root_node,
const NameSet & left_table_expression_columns_names,
const NameSet & right_table_expression_columns_names,
const JoinNode & join_node)
{
std::optional<JoinTableSide> table_side;
std::vector<const ActionsDAG::Node *> nodes_to_process;
nodes_to_process.push_back(expression_root_node);
while (!nodes_to_process.empty())
{
const auto * node_to_process = nodes_to_process.back();
nodes_to_process.pop_back();
for (const auto & child : node_to_process->children)
nodes_to_process.push_back(child);
if (node_to_process->type != ActionsDAG::ActionType::INPUT)
continue;
const auto & input_name = node_to_process->result_name;
bool left_table_expression_contains_input = left_table_expression_columns_names.contains(input_name);
bool right_table_expression_contains_input = right_table_expression_columns_names.contains(input_name);
if (!left_table_expression_contains_input && !right_table_expression_contains_input)
throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION,
"JOIN {} actions has column {} that do not exist in left {} or right {} table expression columns",
join_node.formatASTForErrorMessage(),
input_name,
boost::join(left_table_expression_columns_names, ", "),
boost::join(right_table_expression_columns_names, ", "));
auto input_table_side = left_table_expression_contains_input ? JoinTableSide::Left : JoinTableSide::Right;
if (table_side && (*table_side) != input_table_side)
throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION,
"JOIN {} join expression contains column from left and right table",
join_node.formatASTForErrorMessage());
table_side = input_table_side;
}
return table_side;
}
void buildJoinClause(ActionsDAGPtr join_expression_dag,
const ActionsDAG::Node * join_expressions_actions_node,
const NameSet & left_table_expression_columns_names,
const NameSet & right_table_expression_columns_names,
const JoinNode & join_node,
JoinClause & join_clause)
{
/// For and function go into children
if (join_expressions_actions_node->function && join_expressions_actions_node->function->getName() == "and")
{
for (const auto & child : join_expressions_actions_node->children)
{
buildJoinClause(join_expression_dag,
child,
left_table_expression_columns_names,
right_table_expression_columns_names,
join_node,
join_clause);
}
return;
}
if (join_expressions_actions_node->function && join_expressions_actions_node->function->getName() == "equals")
{
const auto * equals_left_child = join_expressions_actions_node->children.at(0);
const auto * equals_right_child = join_expressions_actions_node->children.at(1);
auto left_equals_expression_side_optional = extractJoinTableSideFromExpression(equals_left_child,
left_table_expression_columns_names,
right_table_expression_columns_names,
join_node);
auto right_equals_expression_side_optional = extractJoinTableSideFromExpression(equals_right_child,
left_table_expression_columns_names,
right_table_expression_columns_names,
join_node);
if (!left_equals_expression_side_optional && !right_equals_expression_side_optional)
{
throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION,
"JOIN {} ON expression {} with constants is not supported",
join_node.formatASTForErrorMessage(),
join_expressions_actions_node->function->getName());
}
else if (left_equals_expression_side_optional && !right_equals_expression_side_optional)
{
join_clause.addCondition(*left_equals_expression_side_optional, join_expressions_actions_node);
}
else if (!left_equals_expression_side_optional && right_equals_expression_side_optional)
{
join_clause.addCondition(*right_equals_expression_side_optional, join_expressions_actions_node);
}
else
{
auto left_equals_expression_side = *left_equals_expression_side_optional;
auto right_equals_expression_side = *right_equals_expression_side_optional;
if (left_equals_expression_side != right_equals_expression_side)
{
const ActionsDAG::Node * left_key = equals_left_child;
const ActionsDAG::Node * right_key = equals_right_child;
if (left_equals_expression_side == JoinTableSide::Right)
{
left_key = equals_right_child;
right_key = equals_left_child;
}
join_clause.addKey(left_key, right_key);
}
else
{
join_clause.addCondition(left_equals_expression_side, join_expressions_actions_node);
}
}
return;
}
auto expression_side_optional = extractJoinTableSideFromExpression(join_expressions_actions_node,
left_table_expression_columns_names,
right_table_expression_columns_names,
join_node);
if (!expression_side_optional)
throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION,
"JOIN {} with constants is not supported",
join_node.formatASTForErrorMessage());
auto expression_side = *expression_side_optional;
join_clause.addCondition(expression_side, join_expressions_actions_node);
}
struct JoinClausesAndActions
{
JoinClauses join_clauses;
ActionsDAGPtr join_expression_actions;
ActionsDAGPtr left_join_expressions_actions;
ActionsDAGPtr right_join_expressions_actions;
};
JoinClausesAndActions buildJoinClausesAndActions(const ColumnsWithTypeAndName & join_expression_input_columns,
const ColumnsWithTypeAndName & left_table_expression_columns,
const ColumnsWithTypeAndName & right_table_expression_columns,
const JoinNode & join_node,
const PlannerContext & planner_context)
{
ActionsDAGPtr join_expression_actions = std::make_shared<ActionsDAG>(join_expression_input_columns);
QueryTreeActionsVisitor join_expression_visitor(join_expression_actions, planner_context);
auto join_expression_dag_node_raw_pointers = join_expression_visitor.visit(join_node.getJoinExpression());
if (join_expression_dag_node_raw_pointers.size() != 1)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"JOIN {} ON clause contains multiple expressions",
join_node.formatASTForErrorMessage());
const auto * join_expressions_actions_root_node = join_expression_dag_node_raw_pointers[0];
if (!join_expressions_actions_root_node->function)
throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION,
"JOIN {} join expression expected function",
join_node.formatASTForErrorMessage());
size_t left_table_expression_columns_size = left_table_expression_columns.size();
Names join_left_actions_names;
join_left_actions_names.reserve(left_table_expression_columns_size);
NameSet join_left_actions_names_set;
join_left_actions_names_set.reserve(left_table_expression_columns_size);
for (const auto & left_table_expression_column : left_table_expression_columns)
{
join_left_actions_names.push_back(left_table_expression_column.name);
join_left_actions_names_set.insert(left_table_expression_column.name);
}
size_t right_table_expression_columns_size = right_table_expression_columns.size();
Names join_right_actions_names;
join_right_actions_names.reserve(right_table_expression_columns_size);
NameSet join_right_actions_names_set;
join_right_actions_names_set.reserve(right_table_expression_columns_size);
for (const auto & right_table_expression_column : right_table_expression_columns)
{
join_right_actions_names.push_back(right_table_expression_column.name);
join_right_actions_names_set.insert(right_table_expression_column.name);
}
JoinClausesAndActions result;
result.join_expression_actions = join_expression_actions;
const auto & function_name = join_expressions_actions_root_node->function->getName();
if (function_name == "or")
{
for (const auto & child : join_expressions_actions_root_node->children)
{
result.join_clauses.emplace_back();
buildJoinClause(join_expression_actions,
child,
join_left_actions_names_set,
join_right_actions_names_set,
join_node,
result.join_clauses.back());
}
}
else
{
result.join_clauses.emplace_back();
buildJoinClause(join_expression_actions,
join_expressions_actions_root_node,
join_left_actions_names_set,
join_right_actions_names_set,
join_node,
result.join_clauses.back());
}
auto and_function = FunctionFactory::instance().get("and", planner_context.query_context);
auto add_necessary_name_if_needed = [&](JoinTableSide join_table_side, const String & name)
{
auto & necessary_names = join_table_side == JoinTableSide::Left ? join_left_actions_names : join_right_actions_names;
auto & necessary_names_set = join_table_side == JoinTableSide::Left ? join_left_actions_names_set : join_right_actions_names_set;
auto [_, inserted] = necessary_names_set.emplace(name);
if (inserted)
necessary_names.push_back(name);
};
for (auto & join_clause : result.join_clauses)
{
const auto & left_filter_condition_nodes = join_clause.getLeftFilterConditionNodes();
if (!left_filter_condition_nodes.empty())
{
const ActionsDAG::Node * dag_filter_condition_node = nullptr;
if (left_filter_condition_nodes.size() > 1)
dag_filter_condition_node = &join_expression_actions->addFunction(and_function, left_filter_condition_nodes, {});
else
dag_filter_condition_node = left_filter_condition_nodes[0];
join_clause.clearConditionNodes(JoinTableSide::Left);
join_clause.addCondition(JoinTableSide::Left, dag_filter_condition_node);
join_expression_actions->addOrReplaceInOutputs(*dag_filter_condition_node);
add_necessary_name_if_needed(JoinTableSide::Left, dag_filter_condition_node->result_name);
}
const auto & right_filter_condition_nodes = join_clause.getRightFilterConditionNodes();
if (!right_filter_condition_nodes.empty())
{
const ActionsDAG::Node * dag_filter_condition_node = nullptr;
if (right_filter_condition_nodes.size() > 1)
dag_filter_condition_node = &join_expression_actions->addFunction(and_function, right_filter_condition_nodes, {});
else
dag_filter_condition_node = right_filter_condition_nodes[0];
join_clause.clearConditionNodes(JoinTableSide::Right);
join_clause.addCondition(JoinTableSide::Right, dag_filter_condition_node);
join_expression_actions->addOrReplaceInOutputs(*dag_filter_condition_node);
add_necessary_name_if_needed(JoinTableSide::Right, dag_filter_condition_node->result_name);
}
assert(join_clause.getLeftKeyNodes().size() == join_clause.getRightKeyNodes().size());
size_t join_clause_left_key_nodes_size = join_clause.getLeftKeyNodes().size();
for (size_t i = 0; i < join_clause_left_key_nodes_size; ++i)
{
auto & left_key_node = join_clause.getLeftKeyNodes()[i];
auto & right_key_node = join_clause.getRightKeyNodes()[i];
if (!left_key_node->result_type->equals(*right_key_node->result_type))
{
DataTypePtr common_type;
try
{
common_type = getLeastSupertype(DataTypes{left_key_node->result_type, right_key_node->result_type});
}
catch (Exception & ex)
{
ex.addMessage("JOIN {} cannot infer common type in ON section for keys. Left key {} type {}. Right key {} type {}",
join_node.formatASTForErrorMessage(),
left_key_node->result_name,
left_key_node->result_type->getName(),
right_key_node->result_name,
right_key_node->result_type->getName());
}
ColumnWithTypeAndName cast_column;
cast_column.name = "__constant_" + common_type->getName();
cast_column.column = DataTypeString().createColumnConst(0, common_type->getName());
cast_column.type = std::make_shared<DataTypeString>();
const ActionsDAG::Node * cast_type_constant_node = nullptr;
if (!left_key_node->result_type->equals(*common_type))
{
cast_type_constant_node = &join_expression_actions->addColumn(cast_column);
FunctionCastBase::Diagnostic diagnostic = {left_key_node->result_name, left_key_node->result_name};
FunctionOverloadResolverPtr func_builder_cast
= CastInternalOverloadResolver<CastType::nonAccurate>::createImpl(diagnostic);
ActionsDAG::NodeRawConstPtrs children = {left_key_node, cast_type_constant_node};
left_key_node = &join_expression_actions->addFunction(func_builder_cast, std::move(children), {});
}
if (!right_key_node->result_type->equals(*common_type))
{
if (!cast_type_constant_node)
cast_type_constant_node = &join_expression_actions->addColumn(cast_column);
FunctionCastBase::Diagnostic diagnostic = {right_key_node->result_name, right_key_node->result_name};
FunctionOverloadResolverPtr func_builder_cast
= CastInternalOverloadResolver<CastType::nonAccurate>::createImpl(std::move(diagnostic));
ActionsDAG::NodeRawConstPtrs children = {right_key_node, cast_type_constant_node};
right_key_node = &join_expression_actions->addFunction(func_builder_cast, std::move(children), {});
}
}
join_expression_actions->addOrReplaceInOutputs(*left_key_node);
join_expression_actions->addOrReplaceInOutputs(*right_key_node);
add_necessary_name_if_needed(JoinTableSide::Left, left_key_node->result_name);
add_necessary_name_if_needed(JoinTableSide::Right, right_key_node->result_name);
}
}
result.left_join_expressions_actions = join_expression_actions->clone();
result.left_join_expressions_actions->removeUnusedActions(join_left_actions_names);
result.right_join_expressions_actions = join_expression_actions->clone();
result.right_join_expressions_actions->removeUnusedActions(join_right_actions_names);
return result;
}
QueryPlan buildQueryPlanForJoinNode(QueryTreeNodePtr join_tree_node,
SelectQueryInfo & select_query_info,
const SelectQueryOptions & select_query_options,
PlannerContext & planner_context)
{
auto & join_node = join_tree_node->as<JoinNode &>();
auto left_plan = buildQueryPlanForJoinTreeNode(join_node.getLeftTableExpression(),
select_query_info,
select_query_options,
planner_context);
auto left_plan_output_columns = left_plan.getCurrentDataStream().header.getColumnsWithTypeAndName();
auto right_plan = buildQueryPlanForJoinTreeNode(join_node.getRightTableExpression(),
select_query_info,
select_query_options,
planner_context);
auto right_plan_output_columns = right_plan.getCurrentDataStream().header.getColumnsWithTypeAndName();
if (join_node.getStrictness() == JoinStrictness::Asof)
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
"JOIN {} ASOF is not supported",
join_node.formatASTForErrorMessage());
JoinClausesAndActions join_clauses_and_actions;
if (join_node.isOnJoinExpression())
{
auto join_expression_input_columns = left_plan_output_columns;
join_expression_input_columns.insert(join_expression_input_columns.end(), right_plan_output_columns.begin(), right_plan_output_columns.end());
join_clauses_and_actions = buildJoinClausesAndActions(join_expression_input_columns,
left_plan_output_columns,
right_plan_output_columns,
join_node,
planner_context);
auto left_join_expressions_actions_step = std::make_unique<ExpressionStep>(left_plan.getCurrentDataStream(), join_clauses_and_actions.left_join_expressions_actions);
left_join_expressions_actions_step->setStepDescription("JOIN actions");
left_plan.addStep(std::move(left_join_expressions_actions_step));
auto right_join_expressions_actions_step = std::make_unique<ExpressionStep>(right_plan.getCurrentDataStream(), join_clauses_and_actions.right_join_expressions_actions);
right_join_expressions_actions_step->setStepDescription("JOIN actions");
right_plan.addStep(std::move(right_join_expressions_actions_step));
}
std::unordered_map<ColumnIdentifier, DataTypePtr> left_plan_column_name_to_cast_type;
std::unordered_map<ColumnIdentifier, DataTypePtr> right_plan_column_name_to_cast_type;
if (join_node.isUsingJoinExpression())
{
auto & join_node_using_columns_list = join_node.getJoinExpression()->as<ListNode &>();
for (auto & join_node_using_node : join_node_using_columns_list.getNodes())
{
auto & join_node_using_column_node = join_node_using_node->as<ColumnNode &>();
auto & inner_columns_list = join_node_using_column_node.getExpressionOrThrow()->as<ListNode &>();
auto & left_inner_column_node = inner_columns_list.getNodes().at(0);
auto & left_inner_column = left_inner_column_node->as<ColumnNode &>();
auto & right_inner_column_node = inner_columns_list.getNodes().at(1);
auto & right_inner_column = right_inner_column_node->as<ColumnNode &>();
const auto & join_node_using_column_node_type = join_node_using_column_node.getColumnType();
if (!left_inner_column.getColumnType()->equals(*join_node_using_column_node_type))
{
auto left_inner_column_identifier = planner_context.getColumnIdentifierOrThrow(left_inner_column_node.get());
left_plan_column_name_to_cast_type.emplace(left_inner_column_identifier, join_node_using_column_node_type);
}
if (!right_inner_column.getColumnType()->equals(*join_node_using_column_node_type))
{
auto right_inner_column_identifier = planner_context.getColumnIdentifierOrThrow(right_inner_column_node.get());
right_plan_column_name_to_cast_type.emplace(right_inner_column_identifier, join_node_using_column_node_type);
}
}
}
auto join_cast_plan_output_nodes = [&](QueryPlan & plan_to_add_cast, std::unordered_map<std::string, DataTypePtr> & plan_column_name_to_cast_type)
{
auto cast_actions_dag = std::make_shared<ActionsDAG>(plan_to_add_cast.getCurrentDataStream().header.getColumnsWithTypeAndName());
for (auto & output_node : cast_actions_dag->getOutputs())
{
auto it = plan_column_name_to_cast_type.find(output_node->result_name);
if (it == plan_column_name_to_cast_type.end())
continue;
const auto & cast_type = it->second;
auto cast_type_name = cast_type->getName();
ColumnWithTypeAndName column;
column.name = "__constant_" + cast_type_name;
column.column = DataTypeString().createColumnConst(0, cast_type_name);
column.type = std::make_shared<DataTypeString>();
const auto * cast_type_constant_node = &cast_actions_dag->addColumn(std::move(column));
FunctionCastBase::Diagnostic diagnostic = {output_node->result_name, output_node->result_name};
FunctionOverloadResolverPtr func_builder_cast
= CastInternalOverloadResolver<CastType::nonAccurate>::createImpl(std::move(diagnostic));
ActionsDAG::NodeRawConstPtrs children = {output_node, cast_type_constant_node};
output_node = &cast_actions_dag->addFunction(func_builder_cast, std::move(children), output_node->result_name);
}
auto cast_join_columns_step
= std::make_unique<ExpressionStep>(plan_to_add_cast.getCurrentDataStream(), std::move(cast_actions_dag));
cast_join_columns_step->setStepDescription("Cast JOIN USING columns");
plan_to_add_cast.addStep(std::move(cast_join_columns_step));
};
if (!left_plan_column_name_to_cast_type.empty())
join_cast_plan_output_nodes(left_plan, left_plan_column_name_to_cast_type);
if (!right_plan_column_name_to_cast_type.empty())
join_cast_plan_output_nodes(right_plan, right_plan_column_name_to_cast_type);
JoinKind join_kind = join_node.getKind();
bool join_use_nulls = planner_context.query_context->getSettingsRef().join_use_nulls;
auto to_nullable_function = FunctionFactory::instance().get("toNullable", planner_context.query_context);
auto join_cast_plan_columns_to_nullable = [&](QueryPlan & plan_to_add_cast)
{
auto cast_actions_dag = std::make_shared<ActionsDAG>(plan_to_add_cast.getCurrentDataStream().header.getColumnsWithTypeAndName());
for (auto & output_node : cast_actions_dag->getOutputs())
{
if (output_node->type == ActionsDAG::ActionType::INPUT && output_node->result_name.starts_with("__column"))
output_node = &cast_actions_dag->addFunction(to_nullable_function, {output_node}, output_node->result_name);
}
auto cast_join_columns_step = std::make_unique<ExpressionStep>(plan_to_add_cast.getCurrentDataStream(), std::move(cast_actions_dag));
cast_join_columns_step->setStepDescription("Cast JOIN columns to Nullable");
plan_to_add_cast.addStep(std::move(cast_join_columns_step));
};
if (join_use_nulls)
{
if (isFull(join_kind))
{
join_cast_plan_columns_to_nullable(left_plan);
join_cast_plan_columns_to_nullable(right_plan);
}
else if (isLeft(join_kind))
{
join_cast_plan_columns_to_nullable(right_plan);
}
else if (isRight(join_kind))
{
join_cast_plan_columns_to_nullable(left_plan);
}
}
auto table_join = std::make_shared<TableJoin>();
table_join->getTableJoin() = join_node.toASTTableJoin()->as<ASTTableJoin &>();
if (join_node.getKind() == JoinKind::Comma)
table_join->getTableJoin().kind = JoinKind::Cross;
table_join->getTableJoin().strictness = JoinStrictness::All;
if (join_node.isOnJoinExpression())
{
const auto & join_clauses = join_clauses_and_actions.join_clauses;
auto & table_join_clauses = table_join->getClauses();
for (const auto & join_clause : join_clauses)
{
table_join_clauses.emplace_back();
auto & table_join_clause = table_join_clauses.back();
const auto & join_clause_left_key_nodes = join_clause.getLeftKeyNodes();
const auto & join_clause_right_key_nodes = join_clause.getRightKeyNodes();
size_t join_clause_key_nodes_size = join_clause_left_key_nodes.size();
assert(join_clause_key_nodes_size == join_clause_right_key_nodes.size());
for (size_t i = 0; i < join_clause_key_nodes_size; ++i)
{
table_join_clause.key_names_left.push_back(join_clause_left_key_nodes[i]->result_name);
table_join_clause.key_names_right.push_back(join_clause_right_key_nodes[i]->result_name);
}
const auto & join_clause_get_left_filter_condition_nodes = join_clause.getLeftFilterConditionNodes();
if (!join_clause_get_left_filter_condition_nodes.empty())
{
if (join_clause_get_left_filter_condition_nodes.size() != 1)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"JOIN {} left filter conditions size must be 1. Actual {}",
join_node.formatASTForErrorMessage(),
join_clause_get_left_filter_condition_nodes.size());
const auto & join_clause_left_filter_condition_name = join_clause_get_left_filter_condition_nodes[0]->result_name;
table_join_clause.analyzer_left_filter_condition_column_name = join_clause_left_filter_condition_name;
}
const auto & join_clause_get_right_filter_condition_nodes = join_clause.getRightFilterConditionNodes();
if (!join_clause_get_right_filter_condition_nodes.empty())
{
if (join_clause_get_right_filter_condition_nodes.size() != 1)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"JOIN {} right filter conditions size must be 1. Actual {}",
join_node.formatASTForErrorMessage(),
join_clause_get_right_filter_condition_nodes.size());
const auto & join_clause_right_filter_condition_name = join_clause_get_right_filter_condition_nodes[0]->result_name;
table_join_clause.analyzer_right_filter_condition_column_name = join_clause_right_filter_condition_name;
}
}
}
else if (join_node.isUsingJoinExpression())
{
auto & table_join_clauses = table_join->getClauses();
table_join_clauses.emplace_back();
auto & table_join_clause = table_join_clauses.back();
auto & using_list = join_node.getJoinExpression()->as<ListNode &>();
for (auto & join_using_node : using_list.getNodes())
{
auto & join_using_column_node = join_using_node->as<ColumnNode &>();
if (!join_using_column_node.getExpression() ||
join_using_column_node.getExpression()->getNodeType() != QueryTreeNodeType::LIST)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"JOIN {} column in USING does not have inner columns",
join_node.formatASTForErrorMessage());
auto & using_join_columns_list = join_using_column_node.getExpression()->as<ListNode &>();
auto & using_join_left_join_column_node = using_join_columns_list.getNodes().at(0);
auto & using_join_right_join_column_node = using_join_columns_list.getNodes().at(1);
auto left_column_identifier_it = planner_context.column_node_to_column_identifier.find(using_join_left_join_column_node.get());
auto right_column_identifier_it = planner_context.column_node_to_column_identifier.find(using_join_right_join_column_node.get());
table_join_clause.key_names_left.push_back(left_column_identifier_it->second);
table_join_clause.key_names_right.push_back(right_column_identifier_it->second);
}
}
auto left_table_names = left_plan.getCurrentDataStream().header.getNames();
NameSet left_table_names_set(left_table_names.begin(), left_table_names.end());
auto columns_from_joined_table = right_plan.getCurrentDataStream().header.getNamesAndTypesList();
table_join->setColumnsFromJoinedTable(columns_from_joined_table, left_table_names_set, "");
for (auto & column_from_joined_table : columns_from_joined_table)
{
if (column_from_joined_table.name.starts_with("__column"))
table_join->addJoinedColumn(column_from_joined_table);
}
size_t max_block_size = planner_context.query_context->getSettingsRef().max_block_size;
size_t max_streams = planner_context.query_context->getSettingsRef().max_threads;
JoinPtr join_ptr = std::make_shared<HashJoin>(table_join, right_plan.getCurrentDataStream().header, false /*any_take_last_row*/);
QueryPlanStepPtr join_step = std::make_unique<JoinStep>(
left_plan.getCurrentDataStream(),
right_plan.getCurrentDataStream(),
join_ptr,
max_block_size,
max_streams,
false /*optimize_read_in_order*/);
join_step->setStepDescription(fmt::format("JOIN {}", JoinPipelineType::FillRightFirst));
std::vector<QueryPlanPtr> plans;
plans.emplace_back(std::make_unique<QueryPlan>(std::move(left_plan)));
plans.emplace_back(std::make_unique<QueryPlan>(std::move(right_plan)));
auto result_plan = QueryPlan();
result_plan.unitePlans(std::move(join_step), {std::move(plans)});
return result_plan;
}
QueryPlan buildQueryPlanForArrayJoinNode(QueryTreeNodePtr table_expression,
SelectQueryInfo & select_query_info,
const SelectQueryOptions & select_query_options,
PlannerContext & planner_context)
{
auto & array_join_node = table_expression->as<ArrayJoinNode &>();
auto left_plan = buildQueryPlanForJoinTreeNode(array_join_node.getTableExpression(),
select_query_info,
select_query_options,
planner_context);
auto left_plan_output_columns = left_plan.getCurrentDataStream().header.getColumnsWithTypeAndName();
ActionsDAGPtr array_join_action_dag = std::make_shared<ActionsDAG>(left_plan_output_columns);
QueryTreeActionsVisitor actions_visitor(array_join_action_dag, planner_context);
NameSet array_join_columns;
for (auto & array_join_expression : array_join_node.getJoinExpressions().getNodes())
{
auto & array_join_expression_column = array_join_expression->as<ColumnNode &>();
const auto & array_join_column_name = array_join_expression_column.getColumnName();
array_join_columns.insert(array_join_column_name);
auto expression_dag_index_nodes = actions_visitor.visit(array_join_expression_column.getExpressionOrThrow());
for (auto & expression_dag_index_node : expression_dag_index_nodes)
{
const auto * array_join_column_node = &array_join_action_dag->addAlias(*expression_dag_index_node, array_join_column_name);
array_join_action_dag->getOutputs().push_back(array_join_column_node);
}
}
auto array_join_actions = std::make_unique<ExpressionStep>(left_plan.getCurrentDataStream(), array_join_action_dag);
array_join_actions->setStepDescription("ARRAY JOIN actions");
left_plan.addStep(std::move(array_join_actions));
auto array_join_action = std::make_shared<ArrayJoinAction>(array_join_columns, array_join_node.isLeft(), planner_context.query_context);
auto array_join_step = std::make_unique<ArrayJoinStep>(left_plan.getCurrentDataStream(), std::move(array_join_action));
array_join_step->setStepDescription("ARRAY JOIN");
left_plan.addStep(std::move(array_join_step));
return left_plan;
}
QueryPlan buildQueryPlanForJoinTreeNode(QueryTreeNodePtr join_tree_node,
SelectQueryInfo & select_query_info,
const SelectQueryOptions & select_query_options,
PlannerContext & planner_context)
{
auto join_tree_node_type = join_tree_node->getNodeType();
switch (join_tree_node_type)
{
case QueryTreeNodeType::QUERY:
[[fallthrough]];
case QueryTreeNodeType::UNION:
[[fallthrough]];
case QueryTreeNodeType::TABLE:
[[fallthrough]];
case QueryTreeNodeType::TABLE_FUNCTION:
{
SelectQueryInfo table_expression_query_info = select_query_info;
return buildQueryPlanForTableExpression(join_tree_node, table_expression_query_info, select_query_options, planner_context);
}
case QueryTreeNodeType::JOIN:
{
return buildQueryPlanForJoinNode(join_tree_node, select_query_info, select_query_options, planner_context);
}
case QueryTreeNodeType::ARRAY_JOIN:
{
return buildQueryPlanForArrayJoinNode(join_tree_node, select_query_info, select_query_options, planner_context);
}
default:
{
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Expected query, table, table function, join or array join query node. Actual {}",
join_tree_node->formatASTForErrorMessage());
}
}
}
}
InterpreterSelectQueryAnalyzer::InterpreterSelectQueryAnalyzer(
const ASTPtr & query_,
const SelectQueryOptions & select_query_options_,
ContextPtr context_)
: WithContext(context_)
, query(query_)
, select_query_options(select_query_options_)
{
if (query->as<ASTSelectWithUnionQuery>() || query->as<ASTSelectQuery>())
{
}
else if (auto * subquery = query->as<ASTSubquery>())
{
query = subquery->children[0];
}
else
{
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
"Expected ASTSelectWithUnionQuery or ASTSelectQuery. Actual {}",
query->formatForErrorMessage());
}
query_tree = buildQueryTree(query, context_);
QueryTreePassManager query_tree_pass_manager(context_);
addQueryTreePasses(query_tree_pass_manager);
query_tree_pass_manager.run(query_tree);
}
InterpreterSelectQueryAnalyzer::InterpreterSelectQueryAnalyzer(
const QueryTreeNodePtr & query_tree_,
const SelectQueryOptions & select_query_options_,
ContextPtr context_)
: WithContext(context_)
, query(query_tree_->toAST())
, query_tree(query_tree_)
, select_query_options(select_query_options_)
{
if (query_tree->getNodeType() != QueryTreeNodeType::QUERY &&
query_tree->getNodeType() != QueryTreeNodeType::UNION)
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
"Expected QUERY or UNION node. Actual {}",
query_tree->formatASTForErrorMessage());
}
Block InterpreterSelectQueryAnalyzer::getSampleBlock()
{
initializeQueryPlanIfNeeded();
return query_plan.getCurrentDataStream().header;
}
BlockIO InterpreterSelectQueryAnalyzer::execute()
{
initializeQueryPlanIfNeeded();
QueryPlanOptimizationSettings optimization_settings;
BuildQueryPipelineSettings build_pipeline_settings;
auto pipeline_builder = query_plan.buildQueryPipeline(optimization_settings, build_pipeline_settings);
BlockIO res;
res.pipeline = QueryPipelineBuilder::getPipeline(std::move(*pipeline_builder));
return res;
}
void InterpreterSelectQueryAnalyzer::initializeQueryPlanIfNeeded()
{
if (query_plan.isInitialized())
return;
auto current_context = getContext();
if (auto * union_query_tree = query_tree->as<UnionNode>())
{
auto union_mode = union_query_tree->getUnionMode();
if (union_mode == SelectUnionMode::Unspecified)
throw Exception(ErrorCodes::LOGICAL_ERROR, "UNION mode must be initialized");
std::vector<std::unique_ptr<QueryPlan>> query_plans;
Blocks query_plans_headers;
for (auto & query_node : union_query_tree->getQueries().getNodes())
{
InterpreterSelectQueryAnalyzer interpeter(query_node, select_query_options, current_context);
interpeter.initializeQueryPlanIfNeeded();
auto query_node_plan = std::make_unique<QueryPlan>(std::move(interpeter).extractQueryPlan());
query_plans_headers.push_back(query_node_plan->getCurrentDataStream().header);
query_plans.push_back(std::move(query_node_plan));
}
Block union_common_header = getCommonHeaderForUnion(query_plans_headers);
DataStreams query_plans_streams;
query_plans_streams.reserve(query_plans.size());
for (auto & query_node_plan : query_plans)
{
if (blocksHaveEqualStructure(query_node_plan->getCurrentDataStream().header, union_common_header))
continue;
auto actions_dag = ActionsDAG::makeConvertingActions(
query_node_plan->getCurrentDataStream().header.getColumnsWithTypeAndName(),
union_common_header.getColumnsWithTypeAndName(),
ActionsDAG::MatchColumnsMode::Position);
auto converting_step = std::make_unique<ExpressionStep>(query_node_plan->getCurrentDataStream(), std::move(actions_dag));
converting_step->setStepDescription("Conversion before UNION");
query_node_plan->addStep(std::move(converting_step));
query_plans_streams.push_back(query_node_plan->getCurrentDataStream());
}
const auto & settings = current_context->getSettingsRef();
auto max_threads = settings.max_threads;
if (union_mode == SelectUnionMode::ALL || union_mode == SelectUnionMode::DISTINCT)
{
auto union_step = std::make_unique<UnionStep>(std::move(query_plans_streams), max_threads);
query_plan.unitePlans(std::move(union_step), std::move(query_plans));
if (union_query_tree->getUnionMode() == SelectUnionMode::DISTINCT)
{
/// Add distinct transform
SizeLimits limits(settings.max_rows_in_distinct, settings.max_bytes_in_distinct, settings.distinct_overflow_mode);
auto distinct_step = std::make_unique<DistinctStep>(
query_plan.getCurrentDataStream(),
limits,
0 /*limit hint*/,
query_plan.getCurrentDataStream().header.getNames(),
false /*pre distinct*/,
settings.optimize_distinct_in_order);
query_plan.addStep(std::move(distinct_step));
}
}
else if (union_mode == SelectUnionMode::INTERSECT || union_mode == SelectUnionMode::EXCEPT)
{
IntersectOrExceptStep::Operator intersect_or_except_operator = IntersectOrExceptStep::Operator::INTERSECT;
if (union_mode == SelectUnionMode::EXCEPT)
intersect_or_except_operator = IntersectOrExceptStep::Operator::EXCEPT;
auto union_step = std::make_unique<IntersectOrExceptStep>(std::move(query_plans_streams), intersect_or_except_operator, max_threads);
query_plan.unitePlans(std::move(union_step), std::move(query_plans));
}
return;
}
auto & query_node = query_tree->as<QueryNode &>();
SelectQueryInfo select_query_info;
select_query_info.original_query = query;
select_query_info.query = query;
PlannerContext planner_context;
planner_context.query_context = getContext();
CollectTableExpressionIdentifiersVisitor collect_table_expression_identifiers_visitor;
collect_table_expression_identifiers_visitor.visit(query_node.getJoinTree(), planner_context);
CollectSourceColumnsVisitor::Data data {planner_context};
CollectSourceColumnsVisitor collect_source_columns_visitor(data);
collect_source_columns_visitor.visit(query_tree);
query_plan = buildQueryPlanForJoinTreeNode(query_node.getJoinTree(), select_query_info, select_query_options, planner_context);
std::optional<std::vector<size_t>> action_chain_node_parent_indices;
if (query_node.hasWhere())
{
ColumnsWithTypeAndName where_input;
if (action_chain_node_parent_indices)
planner_context.actions_chain.getAvailableOutputColumns(*action_chain_node_parent_indices);
else
where_input = query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName();
planner_context.where_actions = convertExpressionNodeIntoDAG(query_node.getWhere(), where_input, planner_context);
planner_context.where_action_node_name = planner_context.where_actions->getOutputs().at(0)->result_name;
auto where_actions_node = std::make_unique<ActionsChainNode>(planner_context.where_actions);
if (action_chain_node_parent_indices)
where_actions_node->addParentIndices(*action_chain_node_parent_indices);
planner_context.actions_chain.addNode(std::move(where_actions_node));
action_chain_node_parent_indices = {planner_context.actions_chain.getLastNodeIndex()};
planner_context.where_actions_chain_node_index = planner_context.actions_chain.size();
}
ColumnsWithTypeAndName projection_input;
if (action_chain_node_parent_indices)
planner_context.actions_chain.getAvailableOutputColumns(*action_chain_node_parent_indices);
else
projection_input = query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName();
planner_context.projection_actions = convertExpressionNodeIntoDAG(query_node.getProjectionNode(), projection_input, planner_context);
auto projection_actions_node = std::make_unique<ActionsChainNode>(planner_context.projection_actions);
if (action_chain_node_parent_indices)
projection_actions_node->addParentIndices(*action_chain_node_parent_indices);
planner_context.actions_chain.addNode(std::move(projection_actions_node));
const auto & projection_action_dag_nodes = planner_context.projection_actions->getOutputs();
size_t projection_action_dag_nodes_size = projection_action_dag_nodes.size();
auto & projection_nodes = query_node.getProjection().getNodes();
size_t projection_nodes_size = projection_nodes.size();
if (projection_nodes_size != projection_action_dag_nodes_size)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"QueryTree projection nodes size mismatch. Expected {}. Actual {}",
projection_action_dag_nodes_size,
projection_nodes_size);
NamesWithAliases projection_names;
for (size_t i = 0; i < projection_nodes_size; ++i)
{
auto & node = projection_nodes[i];
auto node_name = node->getName();
const auto * action_dag_node = projection_action_dag_nodes[i];
const auto & actions_dag_node_name = action_dag_node->result_name;
if (node->hasAlias())
projection_names.push_back({actions_dag_node_name, node->getAlias()});
else
projection_names.push_back({actions_dag_node_name, node_name});
}
planner_context.projection_actions->project(projection_names);
// std::cout << "Chain dump before finalize" << std::endl;
// std::cout << planner_context.actions_chain.dump() << std::endl;
planner_context.actions_chain.finalize();
// std::cout << "Chain dump after finalize" << std::endl;
// std::cout << planner_context.actions_chain.dump() << std::endl;
if (query_node.hasWhere())
{
auto & where_actions_chain_node = planner_context.actions_chain.at(planner_context.where_actions_chain_node_index);
bool remove_filter = !where_actions_chain_node->getChildRequiredOutputColumnsNames().contains(planner_context.where_action_node_name);
auto where_step = std::make_unique<FilterStep>(query_plan.getCurrentDataStream(),
planner_context.where_actions,
planner_context.where_action_node_name,
remove_filter);
where_step->setStepDescription("WHERE");
query_plan.addStep(std::move(where_step));
}
// std::cout << "Query plan dump" << std::endl;
// std::cout << dumpQueryPlan(query_plan) << std::endl;
auto projection_step = std::make_unique<ExpressionStep>(query_plan.getCurrentDataStream(), planner_context.projection_actions);
projection_step->setStepDescription("Projection");
query_plan.addStep(std::move(projection_step));
}
}