mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-19 12:52:37 +00:00
1071 lines
44 KiB
C++
1071 lines
44 KiB
C++
#include <Planner/PlannerActionsVisitor.h>
|
|
|
|
#include <AggregateFunctions/WindowFunction.h>
|
|
#include <Analyzer/Utils.h>
|
|
#include <Analyzer/SetUtils.h>
|
|
#include <Analyzer/ConstantNode.h>
|
|
#include <Analyzer/FunctionNode.h>
|
|
#include <Analyzer/ColumnNode.h>
|
|
#include <Analyzer/LambdaNode.h>
|
|
#include <Analyzer/SortNode.h>
|
|
#include <Analyzer/WindowNode.h>
|
|
#include <Analyzer/QueryNode.h>
|
|
|
|
#include <DataTypes/FieldToDataType.h>
|
|
#include <DataTypes/DataTypeSet.h>
|
|
|
|
#include <Common/FieldVisitorToString.h>
|
|
#include <DataTypes/DataTypeTuple.h>
|
|
|
|
#include <Columns/ColumnSet.h>
|
|
#include <Columns/ColumnConst.h>
|
|
|
|
#include <Functions/FunctionsMiscellaneous.h>
|
|
#include <Functions/FunctionFactory.h>
|
|
#include <Functions/indexHint.h>
|
|
|
|
#include <Interpreters/ExpressionActions.h>
|
|
#include <Interpreters/Context.h>
|
|
|
|
#include <Planner/PlannerContext.h>
|
|
#include <Planner/TableExpressionData.h>
|
|
#include <Planner/Utils.h>
|
|
|
|
#include <Core/Settings.h>
|
|
|
|
|
|
namespace DB
|
|
{
|
|
namespace Setting
|
|
{
|
|
extern const SettingsBool enable_named_columns_in_function_tuple;
|
|
extern const SettingsBool transform_null_in;
|
|
}
|
|
|
|
namespace ErrorCodes
|
|
{
|
|
extern const int UNSUPPORTED_METHOD;
|
|
extern const int LOGICAL_ERROR;
|
|
extern const int BAD_ARGUMENTS;
|
|
extern const int INCORRECT_QUERY;
|
|
}
|
|
|
|
namespace
|
|
{
|
|
|
|
/* Calculates Action node name for ConstantNode.
|
|
*
|
|
* If converting to AST will add a '_CAST' function call,
|
|
* the result action name will also include it.
|
|
*/
|
|
String calculateActionNodeNameWithCastIfNeeded(const ConstantNode & constant_node)
|
|
{
|
|
WriteBufferFromOwnString buffer;
|
|
if (constant_node.requiresCastCall())
|
|
buffer << "_CAST(";
|
|
|
|
buffer << calculateConstantActionNodeName(constant_node.getValue(), constant_node.getResultType());
|
|
|
|
if (constant_node.requiresCastCall())
|
|
{
|
|
/// Projection name for constants is <value>_<type> so for _cast(1, 'String') we will have _cast(1_Uint8, 'String'_String)
|
|
buffer << ", '" << constant_node.getResultType()->getName() << "'_String)";
|
|
}
|
|
|
|
return buffer.str();
|
|
}
|
|
|
|
class ActionNodeNameHelper
|
|
{
|
|
public:
|
|
ActionNodeNameHelper(QueryTreeNodeToName & node_to_name_,
|
|
const PlannerContext & planner_context_,
|
|
bool use_column_identifier_as_action_node_name_)
|
|
: node_to_name(node_to_name_)
|
|
, planner_context(planner_context_)
|
|
, use_column_identifier_as_action_node_name(use_column_identifier_as_action_node_name_)
|
|
{
|
|
}
|
|
|
|
String calculateActionNodeName(const QueryTreeNodePtr & node)
|
|
{
|
|
auto it = node_to_name.find(node);
|
|
if (it != node_to_name.end())
|
|
return it->second;
|
|
|
|
String result;
|
|
auto node_type = node->getNodeType();
|
|
|
|
switch (node_type)
|
|
{
|
|
case QueryTreeNodeType::COLUMN:
|
|
{
|
|
const ColumnIdentifier * column_identifier = nullptr;
|
|
if (use_column_identifier_as_action_node_name)
|
|
column_identifier = planner_context.getColumnNodeIdentifierOrNull(node);
|
|
|
|
if (column_identifier)
|
|
{
|
|
result = *column_identifier;
|
|
}
|
|
else
|
|
{
|
|
const auto & column_node = node->as<ColumnNode &>();
|
|
result = column_node.getColumnName();
|
|
}
|
|
|
|
break;
|
|
}
|
|
case QueryTreeNodeType::CONSTANT:
|
|
{
|
|
const auto & constant_node = node->as<ConstantNode &>();
|
|
/* To ensure that headers match during distributed query we need to simulate action node naming on
|
|
* secondary servers. If we don't do that headers will mismatch due to constant folding.
|
|
*
|
|
* +--------+
|
|
* -----------------| Server |----------------
|
|
* / +--------+ \
|
|
* / \
|
|
* v v
|
|
* +-----------+ +-----------+
|
|
* | Initiator | ------ | Secondary |------
|
|
* +-----------+ / +-----------+ \
|
|
* | / \
|
|
* | / \
|
|
* v / \
|
|
* +---------------+ v v
|
|
* | Wrap in _CAST | +----------------------------+ +----------------------+
|
|
* | if needed | | Constant folded from _CAST | | Constant folded from |
|
|
* +---------------+ +----------------------------+ | another expression |
|
|
* | +----------------------+
|
|
* v |
|
|
* +----------------------------+ v
|
|
* | Name ConstantNode the same | +--------------------------+
|
|
* | as on initiator server | | Generate action name for |
|
|
* | (wrap in _CAST if needed) | | original expression |
|
|
* +----------------------------+ +--------------------------+
|
|
*/
|
|
if (planner_context.isASTLevelOptimizationAllowed())
|
|
{
|
|
result = calculateActionNodeNameWithCastIfNeeded(constant_node);
|
|
}
|
|
else
|
|
{
|
|
// Need to check if constant folded from QueryNode until https://github.com/ClickHouse/ClickHouse/issues/60847 is fixed.
|
|
if (constant_node.hasSourceExpression() && constant_node.getSourceExpression()->getNodeType() != QueryTreeNodeType::QUERY)
|
|
{
|
|
if (constant_node.receivedFromInitiatorServer())
|
|
result = calculateActionNodeNameWithCastIfNeeded(constant_node);
|
|
else
|
|
result = calculateActionNodeName(constant_node.getSourceExpression());
|
|
}
|
|
else
|
|
result = calculateConstantActionNodeName(constant_node.getValue(), constant_node.getResultType());
|
|
}
|
|
break;
|
|
}
|
|
case QueryTreeNodeType::FUNCTION:
|
|
{
|
|
const auto & function_node = node->as<FunctionNode &>();
|
|
if (function_node.getFunctionName() == "__actionName")
|
|
{
|
|
/// Perform sanity check, because user may call this function with unexpected arguments
|
|
const auto & function_argument_nodes = function_node.getArguments().getNodes();
|
|
if (function_argument_nodes.size() == 2)
|
|
{
|
|
if (const auto * second_argument = function_argument_nodes.at(1)->as<ConstantNode>())
|
|
result = toString(second_argument->getValue());
|
|
}
|
|
|
|
/// Empty node name is not allowed and leads to logical errors
|
|
if (result.empty())
|
|
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function __actionName is internal nad should not be used directly");
|
|
break;
|
|
}
|
|
|
|
if (planner_context.getQueryContext()->getSettingsRef()[Setting::enable_named_columns_in_function_tuple])
|
|
{
|
|
/// Function "tuple" which generates named tuple should use argument aliases to construct its name.
|
|
if (function_node.getFunctionName() == "tuple")
|
|
{
|
|
if (const DataTypeTuple * type_tuple = typeid_cast<const DataTypeTuple *>(function_node.getResultType().get()))
|
|
{
|
|
if (type_tuple->haveExplicitNames())
|
|
{
|
|
const auto & names = type_tuple->getElementNames();
|
|
size_t size = names.size();
|
|
WriteBufferFromOwnString s;
|
|
s << "tuple(";
|
|
for (size_t i = 0; i < size; ++i)
|
|
{
|
|
if (i != 0)
|
|
s << ", ";
|
|
s << backQuoteIfNeed(names[i]);
|
|
}
|
|
s << ")";
|
|
result = s.str();
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
String in_function_second_argument_node_name;
|
|
|
|
if (isNameOfInFunction(function_node.getFunctionName()))
|
|
{
|
|
const auto & in_first_argument_node = function_node.getArguments().getNodes().at(0);
|
|
const auto & in_second_argument_node = function_node.getArguments().getNodes().at(1);
|
|
in_function_second_argument_node_name = PlannerContext::createSetKey(in_first_argument_node->getResultType(), in_second_argument_node);
|
|
}
|
|
|
|
WriteBufferFromOwnString buffer;
|
|
buffer << function_node.getFunctionName();
|
|
|
|
const auto & function_parameters_nodes = function_node.getParameters().getNodes();
|
|
|
|
if (!function_parameters_nodes.empty())
|
|
{
|
|
buffer << '(';
|
|
|
|
size_t function_parameters_nodes_size = function_parameters_nodes.size();
|
|
for (size_t i = 0; i < function_parameters_nodes_size; ++i)
|
|
{
|
|
const auto & function_parameter_node = function_parameters_nodes[i];
|
|
buffer << calculateActionNodeName(function_parameter_node);
|
|
|
|
if (i + 1 != function_parameters_nodes_size)
|
|
buffer << ", ";
|
|
}
|
|
|
|
buffer << ')';
|
|
}
|
|
|
|
const auto & function_arguments_nodes = function_node.getArguments().getNodes();
|
|
String function_argument_name;
|
|
|
|
buffer << '(';
|
|
|
|
size_t function_arguments_nodes_size = function_arguments_nodes.size();
|
|
for (size_t i = 0; i < function_arguments_nodes_size; ++i)
|
|
{
|
|
if (i == 1 && !in_function_second_argument_node_name.empty())
|
|
{
|
|
function_argument_name = in_function_second_argument_node_name;
|
|
}
|
|
else
|
|
{
|
|
const auto & function_argument_node = function_arguments_nodes[i];
|
|
function_argument_name = calculateActionNodeName(function_argument_node);
|
|
}
|
|
|
|
buffer << function_argument_name;
|
|
|
|
if (i + 1 != function_arguments_nodes_size)
|
|
buffer << ", ";
|
|
}
|
|
|
|
buffer << ')';
|
|
|
|
if (function_node.isWindowFunction())
|
|
{
|
|
buffer << " OVER (";
|
|
buffer << calculateWindowNodeActionName(node, function_node.getWindowNode());
|
|
buffer << ')';
|
|
}
|
|
|
|
result = buffer.str();
|
|
break;
|
|
}
|
|
case QueryTreeNodeType::LAMBDA:
|
|
{
|
|
/// Initially, the action name was `"__lambda_" + toString(node->getTreeHash());`.
|
|
/// This is not a good idea because:
|
|
/// * hash is different on initiator and shard if the default database is changed in cluster
|
|
/// * hash is reliable only within one node; any change will break queries in between versions
|
|
///
|
|
/// Now, we calculate execution name as (names + types) for lambda arguments + action name (expression)
|
|
/// and this should be more reliable (as long as we trust the calculation of action name for functions)
|
|
|
|
WriteBufferFromOwnString buffer;
|
|
|
|
const auto & lambda_node = node->as<LambdaNode &>();
|
|
const auto & lambda_arguments_nodes = lambda_node.getArguments().getNodes();
|
|
|
|
size_t lambda_arguments_nodes_size = lambda_arguments_nodes.size();
|
|
for (size_t i = 0; i < lambda_arguments_nodes_size; ++i)
|
|
{
|
|
const auto & lambda_argument_node = lambda_arguments_nodes[i];
|
|
buffer << calculateActionNodeName(lambda_argument_node);
|
|
buffer << ' ';
|
|
buffer << lambda_argument_node->as<ColumnNode &>().getResultType()->getName();
|
|
|
|
if (i + 1 != lambda_arguments_nodes_size)
|
|
buffer << ", ";
|
|
}
|
|
|
|
buffer << " -> " << calculateActionNodeName(lambda_node.getExpression());
|
|
|
|
result = buffer.str();
|
|
break;
|
|
}
|
|
default:
|
|
{
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid action query tree node {}", node->formatASTForErrorMessage());
|
|
}
|
|
}
|
|
|
|
node_to_name.emplace(node, result);
|
|
|
|
return result;
|
|
}
|
|
|
|
static String calculateConstantActionNodeName(const Field & constant_literal, const DataTypePtr & constant_type)
|
|
{
|
|
auto constant_name = applyVisitor(FieldVisitorToString(), constant_literal);
|
|
return constant_name + "_" + constant_type->getName();
|
|
}
|
|
|
|
static String calculateConstantActionNodeName(const Field & constant_literal)
|
|
{
|
|
return calculateConstantActionNodeName(constant_literal, applyVisitor(FieldToDataType(), constant_literal));
|
|
}
|
|
|
|
String calculateWindowNodeActionName(const QueryTreeNodePtr & function_nodew_node_, const QueryTreeNodePtr & window_node_)
|
|
{
|
|
const auto & function_node = function_nodew_node_->as<const FunctionNode&>();
|
|
const auto & window_node = window_node_->as<const WindowNode &>();
|
|
WriteBufferFromOwnString buffer;
|
|
|
|
if (window_node.hasPartitionBy())
|
|
{
|
|
buffer << "PARTITION BY ";
|
|
|
|
const auto & partition_by_nodes = window_node.getPartitionBy().getNodes();
|
|
size_t partition_by_nodes_size = partition_by_nodes.size();
|
|
|
|
for (size_t i = 0; i < partition_by_nodes_size; ++i)
|
|
{
|
|
const auto & partition_by_node = partition_by_nodes[i];
|
|
buffer << calculateActionNodeName(partition_by_node);
|
|
if (i + 1 != partition_by_nodes_size)
|
|
buffer << ", ";
|
|
}
|
|
}
|
|
|
|
if (window_node.hasOrderBy())
|
|
{
|
|
if (window_node.hasPartitionBy())
|
|
buffer << ' ';
|
|
|
|
buffer << "ORDER BY ";
|
|
|
|
const auto & order_by_nodes = window_node.getOrderBy().getNodes();
|
|
size_t order_by_nodes_size = order_by_nodes.size();
|
|
|
|
for (size_t i = 0; i < order_by_nodes_size; ++i)
|
|
{
|
|
auto & sort_node = order_by_nodes[i]->as<SortNode &>();
|
|
buffer << calculateActionNodeName(sort_node.getExpression());
|
|
|
|
auto sort_direction = sort_node.getSortDirection();
|
|
buffer << (sort_direction == SortDirection::ASCENDING ? " ASC" : " DESC");
|
|
|
|
auto nulls_sort_direction = sort_node.getNullsSortDirection();
|
|
|
|
if (nulls_sort_direction)
|
|
buffer << " NULLS " << (nulls_sort_direction == sort_direction ? "LAST" : "FIRST");
|
|
|
|
if (auto collator = sort_node.getCollator())
|
|
buffer << " COLLATE " << collator->getLocale();
|
|
|
|
if (sort_node.withFill())
|
|
{
|
|
buffer << " WITH FILL";
|
|
|
|
if (sort_node.hasFillFrom())
|
|
buffer << " FROM " << calculateActionNodeName(sort_node.getFillFrom());
|
|
|
|
if (sort_node.hasFillTo())
|
|
buffer << " TO " << calculateActionNodeName(sort_node.getFillTo());
|
|
|
|
if (sort_node.hasFillStep())
|
|
buffer << " STEP " << calculateActionNodeName(sort_node.getFillStep());
|
|
|
|
if (sort_node.hasFillStaleness())
|
|
buffer << " STALENESS " << calculateActionNodeName(sort_node.getFillStaleness());
|
|
}
|
|
|
|
if (i + 1 != order_by_nodes_size)
|
|
buffer << ", ";
|
|
}
|
|
}
|
|
|
|
auto window_frame_opt = extractWindowFrame(function_node);
|
|
if (window_frame_opt)
|
|
{
|
|
auto & window_frame = *window_frame_opt;
|
|
if (window_node.hasPartitionBy() || window_node.hasOrderBy())
|
|
buffer << ' ';
|
|
|
|
window_frame.toString(buffer);
|
|
}
|
|
|
|
return buffer.str();
|
|
}
|
|
private:
|
|
std::unordered_map<QueryTreeNodePtr, std::string> & node_to_name;
|
|
const PlannerContext & planner_context;
|
|
bool use_column_identifier_as_action_node_name = true;
|
|
};
|
|
|
|
class ActionsScopeNode
|
|
{
|
|
public:
|
|
explicit ActionsScopeNode(ActionsDAG & actions_dag_, QueryTreeNodePtr scope_node_)
|
|
: actions_dag(actions_dag_)
|
|
, scope_node(std::move(scope_node_))
|
|
{
|
|
for (const auto & node : actions_dag.getNodes())
|
|
node_name_to_node[node.result_name] = &node;
|
|
}
|
|
|
|
const QueryTreeNodePtr & getScopeNode() const
|
|
{
|
|
return scope_node;
|
|
}
|
|
|
|
[[maybe_unused]] bool containsNode(const std::string & node_name)
|
|
{
|
|
return node_name_to_node.find(node_name) != node_name_to_node.end();
|
|
}
|
|
|
|
[[maybe_unused]] bool containsInputNode(const std::string & node_name)
|
|
{
|
|
const auto * node = tryGetNode(node_name);
|
|
if (node && node->type == ActionsDAG::ActionType::INPUT)
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
[[maybe_unused]] const ActionsDAG::Node * tryGetNode(const std::string & node_name)
|
|
{
|
|
auto it = node_name_to_node.find(node_name);
|
|
if (it == node_name_to_node.end())
|
|
return {};
|
|
|
|
return it->second;
|
|
}
|
|
|
|
const ActionsDAG::Node * getNodeOrThrow(const std::string & node_name)
|
|
{
|
|
auto it = node_name_to_node.find(node_name);
|
|
if (it == node_name_to_node.end())
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
|
"No node with name {}. There are only nodes {}",
|
|
node_name,
|
|
actions_dag.dumpNames());
|
|
|
|
return it->second;
|
|
}
|
|
|
|
const ActionsDAG::Node * addInputColumnIfNecessary(const std::string & node_name, const DataTypePtr & column_type)
|
|
{
|
|
auto it = node_name_to_node.find(node_name);
|
|
if (it != node_name_to_node.end())
|
|
return it->second;
|
|
|
|
const auto * node = &actions_dag.addInput(node_name, column_type);
|
|
node_name_to_node[node->result_name] = node;
|
|
|
|
return node;
|
|
}
|
|
|
|
const ActionsDAG::Node * addInputConstantColumnIfNecessary(const std::string & node_name, const ColumnWithTypeAndName & column)
|
|
{
|
|
auto it = node_name_to_node.find(node_name);
|
|
if (it != node_name_to_node.end())
|
|
return it->second;
|
|
|
|
const auto * node = &actions_dag.addInput(column);
|
|
node_name_to_node[node->result_name] = node;
|
|
|
|
return node;
|
|
}
|
|
|
|
const ActionsDAG::Node * addConstantIfNecessary(const std::string & node_name, const ColumnWithTypeAndName & column)
|
|
{
|
|
auto it = node_name_to_node.find(node_name);
|
|
if (it != node_name_to_node.end())
|
|
{
|
|
/// It is possible that ActionsDAG already has an input with the same name as constant.
|
|
/// In this case, prefer constant to input.
|
|
/// Constatns affect function return type, which should be consistent with QueryTree.
|
|
/// Query example:
|
|
/// SELECT materialize(toLowCardinality('b')) || 'a' FROM remote('127.0.0.{1,2}', system, one) GROUP BY 'a'
|
|
bool materialized_input = it->second->type == ActionsDAG::ActionType::INPUT && !it->second->column;
|
|
if (!materialized_input)
|
|
return it->second;
|
|
}
|
|
|
|
const auto * node = &actions_dag.addColumn(column);
|
|
node_name_to_node[node->result_name] = node;
|
|
|
|
return node;
|
|
}
|
|
|
|
template <typename FunctionOrOverloadResolver>
|
|
const ActionsDAG::Node * addFunctionIfNecessary(const std::string & node_name, ActionsDAG::NodeRawConstPtrs children, const FunctionOrOverloadResolver & function)
|
|
{
|
|
auto it = node_name_to_node.find(node_name);
|
|
if (it != node_name_to_node.end())
|
|
return it->second;
|
|
|
|
const auto * node = &actions_dag.addFunction(function, children, node_name);
|
|
node_name_to_node[node->result_name] = node;
|
|
|
|
return node;
|
|
}
|
|
|
|
const ActionsDAG::Node * addArrayJoinIfNecessary(const std::string & node_name, const ActionsDAG::Node * child)
|
|
{
|
|
auto it = node_name_to_node.find(node_name);
|
|
if (it != node_name_to_node.end())
|
|
return it->second;
|
|
|
|
const auto * node = &actions_dag.addArrayJoin(*child, node_name);
|
|
node_name_to_node[node->result_name] = node;
|
|
|
|
return node;
|
|
}
|
|
|
|
private:
|
|
std::unordered_map<std::string_view, const ActionsDAG::Node *> node_name_to_node;
|
|
ActionsDAG & actions_dag;
|
|
QueryTreeNodePtr scope_node;
|
|
};
|
|
|
|
class PlannerActionsVisitorImpl
|
|
{
|
|
public:
|
|
PlannerActionsVisitorImpl(ActionsDAG & actions_dag,
|
|
const PlannerContextPtr & planner_context_,
|
|
bool use_column_identifier_as_action_node_name_);
|
|
|
|
ActionsDAG::NodeRawConstPtrs visit(QueryTreeNodePtr expression_node);
|
|
|
|
private:
|
|
|
|
class Levels
|
|
{
|
|
public:
|
|
explicit Levels(size_t level) { set(level); }
|
|
|
|
void set(size_t level)
|
|
{
|
|
check(level);
|
|
if (level)
|
|
mask |= (uint64_t(1) << (level - 1));
|
|
}
|
|
|
|
void reset(size_t level)
|
|
{
|
|
check(level);
|
|
if (level)
|
|
mask &= ~(uint64_t(1) << (level - 1));
|
|
}
|
|
|
|
void add(Levels levels) { mask |= levels.mask; }
|
|
|
|
size_t max() const { return 64 - getLeadingZeroBits(mask); }
|
|
|
|
private:
|
|
uint64_t mask = 0;
|
|
|
|
void check(size_t level)
|
|
{
|
|
if (level > 64)
|
|
throw Exception(ErrorCodes::INCORRECT_QUERY, "Maximum lambda depth exceeded. Maximum 64.");
|
|
}
|
|
};
|
|
|
|
using NodeNameAndNodeMinLevel = std::pair<std::string, Levels>;
|
|
|
|
NodeNameAndNodeMinLevel visitImpl(QueryTreeNodePtr node);
|
|
|
|
NodeNameAndNodeMinLevel visitColumn(const QueryTreeNodePtr & node);
|
|
|
|
NodeNameAndNodeMinLevel visitConstant(const QueryTreeNodePtr & node);
|
|
|
|
NodeNameAndNodeMinLevel visitLambda(const QueryTreeNodePtr & node);
|
|
|
|
NodeNameAndNodeMinLevel makeSetForInFunction(const QueryTreeNodePtr & node);
|
|
|
|
NodeNameAndNodeMinLevel visitIndexHintFunction(const QueryTreeNodePtr & node);
|
|
|
|
NodeNameAndNodeMinLevel visitFunction(const QueryTreeNodePtr & node);
|
|
|
|
std::vector<ActionsScopeNode> actions_stack;
|
|
std::unordered_map<QueryTreeNodePtr, std::string> node_to_node_name;
|
|
const PlannerContextPtr planner_context;
|
|
ActionNodeNameHelper action_node_name_helper;
|
|
bool use_column_identifier_as_action_node_name;
|
|
};
|
|
|
|
PlannerActionsVisitorImpl::PlannerActionsVisitorImpl(ActionsDAG & actions_dag,
|
|
const PlannerContextPtr & planner_context_,
|
|
bool use_column_identifier_as_action_node_name_)
|
|
: planner_context(planner_context_)
|
|
, action_node_name_helper(node_to_node_name, *planner_context, use_column_identifier_as_action_node_name_)
|
|
, use_column_identifier_as_action_node_name(use_column_identifier_as_action_node_name_)
|
|
{
|
|
actions_stack.emplace_back(actions_dag, nullptr);
|
|
}
|
|
|
|
ActionsDAG::NodeRawConstPtrs PlannerActionsVisitorImpl::visit(QueryTreeNodePtr expression_node)
|
|
{
|
|
ActionsDAG::NodeRawConstPtrs result;
|
|
|
|
if (auto * expression_list_node = expression_node->as<ListNode>())
|
|
{
|
|
for (auto & node : expression_list_node->getNodes())
|
|
{
|
|
auto [node_name, _] = visitImpl(node);
|
|
result.push_back(actions_stack.front().getNodeOrThrow(node_name));
|
|
}
|
|
}
|
|
else
|
|
{
|
|
auto [node_name, _] = visitImpl(expression_node);
|
|
result.push_back(actions_stack.front().getNodeOrThrow(node_name));
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitImpl(QueryTreeNodePtr node)
|
|
{
|
|
auto node_type = node->getNodeType();
|
|
|
|
if (node_type == QueryTreeNodeType::COLUMN)
|
|
return visitColumn(node);
|
|
if (node_type == QueryTreeNodeType::CONSTANT)
|
|
return visitConstant(node);
|
|
if (node_type == QueryTreeNodeType::FUNCTION)
|
|
return visitFunction(node);
|
|
|
|
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
|
|
"Expected column, constant, function. Actual {} with type: {}",
|
|
node->formatASTForErrorMessage(), node_type);
|
|
}
|
|
|
|
PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitColumn(const QueryTreeNodePtr & node)
|
|
{
|
|
auto column_node_name = action_node_name_helper.calculateActionNodeName(node);
|
|
const auto & column_node = node->as<ColumnNode &>();
|
|
if (column_node.hasExpression() && !use_column_identifier_as_action_node_name)
|
|
return visitImpl(column_node.getExpression());
|
|
Int64 actions_stack_size = static_cast<Int64>(actions_stack.size() - 1);
|
|
for (Int64 i = actions_stack_size; i >= 0; --i)
|
|
{
|
|
actions_stack[i].addInputColumnIfNecessary(column_node_name, column_node.getColumnType());
|
|
|
|
auto column_source = column_node.getColumnSourceOrNull();
|
|
if (column_source &&
|
|
column_source->getNodeType() == QueryTreeNodeType::LAMBDA &&
|
|
actions_stack[i].getScopeNode().get() == column_source.get())
|
|
{
|
|
return {column_node_name, Levels(i)};
|
|
}
|
|
}
|
|
|
|
return {column_node_name, Levels(0)};
|
|
}
|
|
|
|
PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitConstant(const QueryTreeNodePtr & node)
|
|
{
|
|
const auto & constant_node = node->as<ConstantNode &>();
|
|
const auto & constant_literal = constant_node.getValue();
|
|
const auto & constant_type = constant_node.getResultType();
|
|
|
|
auto constant_node_name = [&]()
|
|
{
|
|
/* To ensure that headers match during distributed query we need to simulate action node naming on
|
|
* secondary servers. If we don't do that headers will mismatch due to constant folding.
|
|
*
|
|
* +--------+
|
|
* -----------------| Server |----------------
|
|
* / +--------+ \
|
|
* / \
|
|
* v v
|
|
* +-----------+ +-----------+
|
|
* | Initiator | ------ | Secondary |------
|
|
* +-----------+ / +-----------+ \
|
|
* | / \
|
|
* | / \
|
|
* v / \
|
|
* +---------------+ v v
|
|
* | Wrap in _CAST | +----------------------------+ +----------------------+
|
|
* | if needed | | Constant folded from _CAST | | Constant folded from |
|
|
* +---------------+ +----------------------------+ | another expression |
|
|
* | +----------------------+
|
|
* v |
|
|
* +----------------------------+ v
|
|
* | Name ConstantNode the same | +--------------------------+
|
|
* | as on initiator server | | Generate action name for |
|
|
* | (wrap in _CAST if needed) | | original expression |
|
|
* +----------------------------+ +--------------------------+
|
|
*/
|
|
if (planner_context->isASTLevelOptimizationAllowed())
|
|
{
|
|
return calculateActionNodeNameWithCastIfNeeded(constant_node);
|
|
}
|
|
|
|
// Need to check if constant folded from QueryNode until https://github.com/ClickHouse/ClickHouse/issues/60847 is fixed.
|
|
if (constant_node.hasSourceExpression() && constant_node.getSourceExpression()->getNodeType() != QueryTreeNodeType::QUERY)
|
|
{
|
|
if (constant_node.receivedFromInitiatorServer())
|
|
return calculateActionNodeNameWithCastIfNeeded(constant_node);
|
|
return action_node_name_helper.calculateActionNodeName(constant_node.getSourceExpression());
|
|
}
|
|
return calculateConstantActionNodeName(constant_literal, constant_type);
|
|
}();
|
|
|
|
ColumnWithTypeAndName column;
|
|
column.name = constant_node_name;
|
|
column.type = constant_type;
|
|
column.column = column.type->createColumnConst(1, constant_literal);
|
|
|
|
actions_stack[0].addConstantIfNecessary(constant_node_name, column);
|
|
|
|
size_t actions_stack_size = actions_stack.size();
|
|
for (size_t i = 1; i < actions_stack_size; ++i)
|
|
{
|
|
auto & actions_stack_node = actions_stack[i];
|
|
actions_stack_node.addInputConstantColumnIfNecessary(constant_node_name, column);
|
|
}
|
|
|
|
return {constant_node_name, Levels(0)};
|
|
|
|
}
|
|
|
|
PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitLambda(const QueryTreeNodePtr & node)
|
|
{
|
|
auto & lambda_node = node->as<LambdaNode &>();
|
|
auto result_type = lambda_node.getResultType();
|
|
if (!result_type)
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
|
"Lambda {} is not resolved during query analysis",
|
|
lambda_node.formatASTForErrorMessage());
|
|
|
|
auto & lambda_arguments_nodes = lambda_node.getArguments().getNodes();
|
|
size_t lambda_arguments_nodes_size = lambda_arguments_nodes.size();
|
|
|
|
NamesAndTypesList lambda_arguments_names_and_types;
|
|
|
|
for (size_t i = 0; i < lambda_arguments_nodes_size; ++i)
|
|
{
|
|
const auto & lambda_argument_name = lambda_node.getArgumentNames().at(i);
|
|
auto lambda_argument_type = lambda_arguments_nodes[i]->getResultType();
|
|
lambda_arguments_names_and_types.emplace_back(lambda_argument_name, std::move(lambda_argument_type));
|
|
}
|
|
|
|
ActionsDAG lambda_actions_dag;
|
|
actions_stack.emplace_back(lambda_actions_dag, node);
|
|
|
|
auto [lambda_expression_node_name, levels] = visitImpl(lambda_node.getExpression());
|
|
lambda_actions_dag.getOutputs().push_back(actions_stack.back().getNodeOrThrow(lambda_expression_node_name));
|
|
lambda_actions_dag.removeUnusedActions(Names(1, lambda_expression_node_name));
|
|
|
|
auto expression_actions_settings = ExpressionActionsSettings::fromContext(planner_context->getQueryContext(), CompileExpressions::yes);
|
|
auto lambda_actions = std::make_shared<ExpressionActions>(std::move(lambda_actions_dag), expression_actions_settings);
|
|
|
|
Names captured_column_names;
|
|
ActionsDAG::NodeRawConstPtrs lambda_children;
|
|
Names required_column_names = lambda_actions->getRequiredColumns();
|
|
|
|
actions_stack.pop_back();
|
|
levels.reset(actions_stack.size());
|
|
size_t level = levels.max();
|
|
|
|
const auto & lambda_argument_names = lambda_node.getArgumentNames();
|
|
|
|
for (const auto & required_column_name : required_column_names)
|
|
{
|
|
auto it = std::find(lambda_argument_names.begin(), lambda_argument_names.end(), required_column_name);
|
|
|
|
if (it == lambda_argument_names.end())
|
|
{
|
|
lambda_children.push_back(actions_stack[level].getNodeOrThrow(required_column_name));
|
|
captured_column_names.push_back(required_column_name);
|
|
}
|
|
}
|
|
|
|
auto lambda_node_name = calculateActionNodeName(node, *planner_context);
|
|
auto function_capture = std::make_shared<FunctionCaptureOverloadResolver>(
|
|
lambda_actions, captured_column_names, lambda_arguments_names_and_types, lambda_node.getExpression()->getResultType(), lambda_expression_node_name, true);
|
|
|
|
// TODO: Pass IFunctionBase here not FunctionCaptureOverloadResolver.
|
|
const auto * actions_node = actions_stack[level].addFunctionIfNecessary(lambda_node_name, std::move(lambda_children), function_capture);
|
|
|
|
if (!result_type->equals(*actions_node->result_type))
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
|
"Lambda resolved type {} is not equal to type from actions DAG {}",
|
|
result_type, actions_node->result_type);
|
|
|
|
size_t actions_stack_size = actions_stack.size();
|
|
for (size_t i = level + 1; i < actions_stack_size; ++i)
|
|
{
|
|
auto & actions_stack_node = actions_stack[i];
|
|
actions_stack_node.addInputColumnIfNecessary(lambda_node_name, result_type);
|
|
}
|
|
|
|
return {lambda_node_name, levels};
|
|
}
|
|
|
|
PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::makeSetForInFunction(const QueryTreeNodePtr & node)
|
|
{
|
|
const auto & function_node = node->as<FunctionNode &>();
|
|
auto in_first_argument = function_node.getArguments().getNodes().at(0);
|
|
auto in_second_argument = function_node.getArguments().getNodes().at(1);
|
|
|
|
DataTypes set_element_types;
|
|
|
|
auto in_second_argument_node_type = in_second_argument->getNodeType();
|
|
|
|
bool subquery_or_table =
|
|
in_second_argument_node_type == QueryTreeNodeType::QUERY ||
|
|
in_second_argument_node_type == QueryTreeNodeType::UNION ||
|
|
in_second_argument_node_type == QueryTreeNodeType::TABLE;
|
|
|
|
FutureSetPtr set;
|
|
auto set_key = in_second_argument->getTreeHash();
|
|
|
|
if (!subquery_or_table)
|
|
{
|
|
set_element_types = {in_first_argument->getResultType()};
|
|
const auto * left_tuple_type = typeid_cast<const DataTypeTuple *>(set_element_types.front().get());
|
|
if (left_tuple_type && left_tuple_type->getElements().size() != 1)
|
|
set_element_types = left_tuple_type->getElements();
|
|
|
|
set_element_types
|
|
= Set::getElementTypes(std::move(set_element_types), planner_context->getQueryContext()->getSettingsRef()[Setting::transform_null_in]);
|
|
set = planner_context->getPreparedSets().findTuple(set_key, set_element_types);
|
|
}
|
|
else
|
|
{
|
|
set = planner_context->getPreparedSets().findSubquery(set_key);
|
|
if (!set)
|
|
set = planner_context->getPreparedSets().findStorage(set_key);
|
|
}
|
|
|
|
if (!set)
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
|
"No set is registered for key {}",
|
|
PreparedSets::toString(set_key, set_element_types));
|
|
|
|
ColumnWithTypeAndName column;
|
|
column.name = DB::PlannerContext::createSetKey(in_first_argument->getResultType(), in_second_argument);
|
|
column.type = std::make_shared<DataTypeSet>();
|
|
|
|
bool set_is_created = set->get() != nullptr;
|
|
auto column_set = ColumnSet::create(1, std::move(set));
|
|
|
|
if (set_is_created)
|
|
column.column = ColumnConst::create(std::move(column_set), 1);
|
|
else
|
|
column.column = std::move(column_set);
|
|
|
|
actions_stack[0].addConstantIfNecessary(column.name, column);
|
|
|
|
size_t actions_stack_size = actions_stack.size();
|
|
for (size_t i = 1; i < actions_stack_size; ++i)
|
|
{
|
|
auto & actions_stack_node = actions_stack[i];
|
|
actions_stack_node.addInputConstantColumnIfNecessary(column.name, column);
|
|
}
|
|
|
|
return {column.name, Levels(0)};
|
|
}
|
|
|
|
PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitIndexHintFunction(const QueryTreeNodePtr & node)
|
|
{
|
|
const auto & function_node = node->as<FunctionNode &>();
|
|
auto function_node_name = action_node_name_helper.calculateActionNodeName(node);
|
|
|
|
ActionsDAG index_hint_actions_dag;
|
|
auto & index_hint_actions_dag_outputs = index_hint_actions_dag.getOutputs();
|
|
std::unordered_set<std::string_view> index_hint_actions_dag_output_node_names;
|
|
PlannerActionsVisitor actions_visitor(planner_context);
|
|
|
|
for (const auto & argument : function_node.getArguments())
|
|
{
|
|
auto index_hint_argument_expression_dag_nodes = actions_visitor.visit(index_hint_actions_dag, argument);
|
|
|
|
for (auto & expression_dag_node : index_hint_argument_expression_dag_nodes)
|
|
{
|
|
if (index_hint_actions_dag_output_node_names.contains(expression_dag_node->result_name))
|
|
continue;
|
|
|
|
index_hint_actions_dag_output_node_names.insert(expression_dag_node->result_name);
|
|
index_hint_actions_dag_outputs.push_back(expression_dag_node);
|
|
}
|
|
}
|
|
|
|
auto index_hint_function = std::make_shared<FunctionIndexHint>();
|
|
index_hint_function->setActions(std::move(index_hint_actions_dag));
|
|
auto index_hint_function_overload_resolver = std::make_shared<FunctionToOverloadResolverAdaptor>(std::move(index_hint_function));
|
|
|
|
size_t index_hint_function_level = actions_stack.size() - 1;
|
|
actions_stack[index_hint_function_level].addFunctionIfNecessary(function_node_name, {}, index_hint_function_overload_resolver);
|
|
|
|
return {function_node_name, Levels(index_hint_function_level)};
|
|
}
|
|
|
|
PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitFunction(const QueryTreeNodePtr & node)
|
|
{
|
|
const auto & function_node = node->as<FunctionNode &>();
|
|
if (function_node.getFunctionName() == "indexHint")
|
|
return visitIndexHintFunction(node);
|
|
|
|
std::optional<NodeNameAndNodeMinLevel> in_function_second_argument_node_name_with_level;
|
|
|
|
if (isNameOfInFunction(function_node.getFunctionName()))
|
|
in_function_second_argument_node_name_with_level = makeSetForInFunction(node);
|
|
|
|
auto function_node_name = action_node_name_helper.calculateActionNodeName(node);
|
|
|
|
/* Aggregate functions, window functions, and GROUP BY expressions were already analyzed in the previous steps.
|
|
* If we have already visited some expression, we don't need to revisit it or its arguments again.
|
|
* For example, the expression from the aggregation step is also present in the projection:
|
|
* SELECT foo(a, b, c) as x FROM table GROUP BY foo(a, b, c)
|
|
* In this case we should not analyze `a`, `b`, `c` again.
|
|
* Moreover, it can lead to an error if we have arrayJoin in the arguments because it will be calculated twice.
|
|
*/
|
|
bool is_input_node = function_node.isAggregateFunction() || function_node.isWindowFunction()
|
|
|| actions_stack.front().containsInputNode(function_node_name);
|
|
if (is_input_node)
|
|
{
|
|
size_t actions_stack_size = actions_stack.size();
|
|
|
|
for (size_t i = 0; i < actions_stack_size; ++i)
|
|
{
|
|
auto & actions_stack_node = actions_stack[i];
|
|
actions_stack_node.addInputColumnIfNecessary(function_node_name, function_node.getResultType());
|
|
}
|
|
|
|
return {function_node_name, Levels(0)};
|
|
}
|
|
|
|
const auto & function_arguments = function_node.getArguments().getNodes();
|
|
size_t function_arguments_size = function_arguments.size();
|
|
|
|
Names function_arguments_node_names;
|
|
function_arguments_node_names.reserve(function_arguments_size);
|
|
|
|
Levels levels(0);
|
|
for (size_t function_argument_index = 0; function_argument_index < function_arguments_size; ++function_argument_index)
|
|
{
|
|
if (in_function_second_argument_node_name_with_level && function_argument_index == 1)
|
|
{
|
|
auto & [node_name, node_levels] = *in_function_second_argument_node_name_with_level;
|
|
function_arguments_node_names.push_back(std::move(node_name));
|
|
levels.add(node_levels);
|
|
continue;
|
|
}
|
|
|
|
const auto & argument = function_arguments[function_argument_index];
|
|
|
|
if (argument->getNodeType() == QueryTreeNodeType::LAMBDA)
|
|
{
|
|
auto [node_name, node_levels] = visitLambda(argument);
|
|
function_arguments_node_names.push_back(std::move(node_name));
|
|
levels.add(node_levels);
|
|
continue;
|
|
}
|
|
|
|
auto [node_name, node_levels] = visitImpl(argument);
|
|
function_arguments_node_names.push_back(std::move(node_name));
|
|
levels.add(node_levels);
|
|
}
|
|
|
|
ActionsDAG::NodeRawConstPtrs children;
|
|
children.reserve(function_arguments_size);
|
|
|
|
size_t level = levels.max();
|
|
for (auto & function_argument_node_name : function_arguments_node_names)
|
|
children.push_back(actions_stack[level].getNodeOrThrow(function_argument_node_name));
|
|
|
|
if (function_node.getFunctionName() == "arrayJoin")
|
|
{
|
|
if (level != 0)
|
|
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
|
"Expression in arrayJoin cannot depend on lambda argument: {} ",
|
|
function_arguments_node_names.at(0));
|
|
|
|
actions_stack[level].addArrayJoinIfNecessary(function_node_name, children.at(0));
|
|
}
|
|
else
|
|
{
|
|
actions_stack[level].addFunctionIfNecessary(function_node_name, children, function_node);
|
|
}
|
|
|
|
size_t actions_stack_size = actions_stack.size();
|
|
for (size_t i = level + 1; i < actions_stack_size; ++i)
|
|
{
|
|
auto & actions_stack_node = actions_stack[i];
|
|
actions_stack_node.addInputColumnIfNecessary(function_node_name, function_node.getResultType());
|
|
}
|
|
|
|
return {function_node_name, levels};
|
|
}
|
|
|
|
}
|
|
|
|
PlannerActionsVisitor::PlannerActionsVisitor(const PlannerContextPtr & planner_context_, bool use_column_identifier_as_action_node_name_)
|
|
: planner_context(planner_context_)
|
|
, use_column_identifier_as_action_node_name(use_column_identifier_as_action_node_name_)
|
|
{}
|
|
|
|
ActionsDAG::NodeRawConstPtrs PlannerActionsVisitor::visit(ActionsDAG & actions_dag, QueryTreeNodePtr expression_node)
|
|
{
|
|
PlannerActionsVisitorImpl actions_visitor_impl(actions_dag, planner_context, use_column_identifier_as_action_node_name);
|
|
return actions_visitor_impl.visit(expression_node);
|
|
}
|
|
|
|
String calculateActionNodeName(const QueryTreeNodePtr & node,
|
|
const PlannerContext & planner_context,
|
|
QueryTreeNodeToName & node_to_name,
|
|
bool use_column_identifier_as_action_node_name)
|
|
{
|
|
ActionNodeNameHelper helper(node_to_name, planner_context, use_column_identifier_as_action_node_name);
|
|
return helper.calculateActionNodeName(node);
|
|
}
|
|
|
|
String calculateActionNodeName(const QueryTreeNodePtr & node, const PlannerContext & planner_context, bool use_column_identifier_as_action_node_name)
|
|
{
|
|
QueryTreeNodeToName empty_map;
|
|
ActionNodeNameHelper helper(empty_map, planner_context, use_column_identifier_as_action_node_name);
|
|
return helper.calculateActionNodeName(node);
|
|
}
|
|
|
|
String calculateConstantActionNodeName(const Field & constant_literal, const DataTypePtr & constant_type)
|
|
{
|
|
return ActionNodeNameHelper::calculateConstantActionNodeName(constant_literal, constant_type);
|
|
}
|
|
|
|
String calculateConstantActionNodeName(const Field & constant_literal)
|
|
{
|
|
return ActionNodeNameHelper::calculateConstantActionNodeName(constant_literal);
|
|
}
|
|
|
|
String calculateWindowNodeActionName(const QueryTreeNodePtr & function_node, const QueryTreeNodePtr & window_node, const PlannerContext & planner_context, bool use_column_identifier_as_action_node_name)
|
|
{
|
|
QueryTreeNodeToName empty_map;
|
|
ActionNodeNameHelper helper(empty_map, planner_context, use_column_identifier_as_action_node_name);
|
|
return helper.calculateWindowNodeActionName(function_node, window_node);
|
|
}
|
|
|
|
}
|