2020-11-16 14:57:56 +00:00
|
|
|
#include <Interpreters/ActionsDAG.h>
|
|
|
|
|
|
|
|
#include <DataTypes/DataTypeArray.h>
|
2020-11-17 14:51:05 +00:00
|
|
|
#include <DataTypes/DataTypeString.h>
|
2020-11-16 14:57:56 +00:00
|
|
|
#include <Functions/IFunction.h>
|
2020-11-17 14:51:05 +00:00
|
|
|
#include <Functions/IFunctionAdaptors.h>
|
|
|
|
#include <Functions/FunctionsConversion.h>
|
2020-11-17 19:43:26 +00:00
|
|
|
#include <Functions/materialize.h>
|
2021-02-10 16:26:49 +00:00
|
|
|
#include <Functions/FunctionsLogical.h>
|
2020-11-16 14:57:56 +00:00
|
|
|
#include <Interpreters/Context.h>
|
|
|
|
#include <Interpreters/ExpressionJIT.h>
|
|
|
|
#include <IO/WriteBufferFromString.h>
|
|
|
|
#include <IO/Operators.h>
|
|
|
|
|
|
|
|
#include <stack>
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int LOGICAL_ERROR;
|
|
|
|
extern const int DUPLICATE_COLUMN;
|
|
|
|
extern const int UNKNOWN_IDENTIFIER;
|
|
|
|
extern const int TYPE_MISMATCH;
|
2020-11-17 14:51:05 +00:00
|
|
|
extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH;
|
|
|
|
extern const int THERE_IS_NO_COLUMN;
|
2020-11-17 16:24:25 +00:00
|
|
|
extern const int ILLEGAL_COLUMN;
|
2020-11-16 14:57:56 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2020-11-17 12:39:41 +00:00
|
|
|
ActionsDAG::ActionsDAG(const NamesAndTypesList & inputs_)
|
2020-11-16 14:57:56 +00:00
|
|
|
{
|
2020-11-17 12:39:41 +00:00
|
|
|
for (const auto & input : inputs_)
|
2021-03-02 17:08:59 +00:00
|
|
|
addInput(input.name, input.type);
|
2020-11-16 14:57:56 +00:00
|
|
|
}
|
|
|
|
|
2020-11-17 12:39:41 +00:00
|
|
|
ActionsDAG::ActionsDAG(const ColumnsWithTypeAndName & inputs_)
|
2020-11-16 14:57:56 +00:00
|
|
|
{
|
2020-11-17 12:39:41 +00:00
|
|
|
for (const auto & input : inputs_)
|
2020-11-16 14:57:56 +00:00
|
|
|
{
|
|
|
|
if (input.column && isColumnConst(*input.column))
|
2021-02-02 11:58:35 +00:00
|
|
|
{
|
2021-03-02 17:08:59 +00:00
|
|
|
addInput(input);
|
2021-02-02 11:58:35 +00:00
|
|
|
|
|
|
|
/// Here we also add column.
|
|
|
|
/// It will allow to remove input which is actually constant (after projection).
|
|
|
|
/// Also, some transforms from query pipeline may randomly materialize constants,
|
|
|
|
/// without any respect to header structure. So, it is a way to drop materialized column and use
|
|
|
|
/// constant value from header.
|
|
|
|
/// We cannot remove such input right now cause inputs positions are important in some cases.
|
2021-03-02 17:08:59 +00:00
|
|
|
addColumn(input);
|
2021-02-02 11:58:35 +00:00
|
|
|
}
|
2020-11-16 14:57:56 +00:00
|
|
|
else
|
2021-03-02 17:08:59 +00:00
|
|
|
addInput(input.name, input.type);
|
2020-11-16 14:57:56 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-02 17:08:59 +00:00
|
|
|
ActionsDAG::Node & ActionsDAG::addNode(Node node)
|
2020-11-16 14:57:56 +00:00
|
|
|
{
|
2021-03-02 17:08:59 +00:00
|
|
|
// auto it = index.find(node.result_name);
|
|
|
|
// if (it != index.end() && !can_replace && add_to_index)
|
|
|
|
// throw Exception("Column '" + node.result_name + "' already exists", ErrorCodes::DUPLICATE_COLUMN);
|
2020-11-16 14:57:56 +00:00
|
|
|
|
|
|
|
auto & res = nodes.emplace_back(std::move(node));
|
|
|
|
|
2020-11-17 12:34:31 +00:00
|
|
|
if (res.type == ActionType::INPUT)
|
|
|
|
inputs.emplace_back(&res);
|
|
|
|
|
2021-03-02 17:08:59 +00:00
|
|
|
// if (add_to_index)
|
|
|
|
// index.replace(&res);
|
2020-11-16 14:57:56 +00:00
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
2021-03-02 17:08:59 +00:00
|
|
|
// ActionsDAG::Node & ActionsDAG::getNode(const std::string & name)
|
|
|
|
// {
|
|
|
|
// auto it = index.find(name);
|
|
|
|
// if (it == index.end())
|
|
|
|
// throw Exception("Unknown identifier: '" + name + "'", ErrorCodes::UNKNOWN_IDENTIFIER);
|
2020-11-16 14:57:56 +00:00
|
|
|
|
2021-03-02 17:08:59 +00:00
|
|
|
// return **it;
|
|
|
|
// }
|
2020-11-16 14:57:56 +00:00
|
|
|
|
2021-03-02 17:08:59 +00:00
|
|
|
const ActionsDAG::Node & ActionsDAG::addInput(std::string name, DataTypePtr type)
|
2020-11-16 14:57:56 +00:00
|
|
|
{
|
|
|
|
Node node;
|
|
|
|
node.type = ActionType::INPUT;
|
|
|
|
node.result_type = std::move(type);
|
|
|
|
node.result_name = std::move(name);
|
|
|
|
|
2021-03-02 17:08:59 +00:00
|
|
|
return addNode(std::move(node));
|
2020-11-16 14:57:56 +00:00
|
|
|
}
|
|
|
|
|
2021-03-02 17:08:59 +00:00
|
|
|
const ActionsDAG::Node & ActionsDAG::addInput(ColumnWithTypeAndName column)
|
2020-11-16 14:57:56 +00:00
|
|
|
{
|
|
|
|
Node node;
|
|
|
|
node.type = ActionType::INPUT;
|
|
|
|
node.result_type = std::move(column.type);
|
|
|
|
node.result_name = std::move(column.name);
|
|
|
|
node.column = std::move(column.column);
|
|
|
|
|
2021-03-02 17:08:59 +00:00
|
|
|
return addNode(std::move(node));
|
2020-11-16 14:57:56 +00:00
|
|
|
}
|
|
|
|
|
2021-03-02 17:08:59 +00:00
|
|
|
const ActionsDAG::Node & ActionsDAG::addColumn(ColumnWithTypeAndName column)
|
2020-11-16 14:57:56 +00:00
|
|
|
{
|
|
|
|
if (!column.column)
|
|
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add column {} because it is nullptr", column.name);
|
|
|
|
|
|
|
|
Node node;
|
|
|
|
node.type = ActionType::COLUMN;
|
|
|
|
node.result_type = std::move(column.type);
|
|
|
|
node.result_name = std::move(column.name);
|
|
|
|
node.column = std::move(column.column);
|
|
|
|
|
2021-03-02 17:08:59 +00:00
|
|
|
auto * res = &addNode(std::move(node));
|
2021-02-05 16:35:21 +00:00
|
|
|
|
2021-03-02 17:08:59 +00:00
|
|
|
// if (materialize)
|
|
|
|
// {
|
|
|
|
// auto & name = res->result_name;
|
2021-02-05 16:35:21 +00:00
|
|
|
|
2021-03-02 17:08:59 +00:00
|
|
|
// FunctionOverloadResolverPtr func_builder_materialize =
|
|
|
|
// std::make_shared<FunctionOverloadResolverAdaptor>(
|
|
|
|
// std::make_unique<DefaultOverloadResolver>(
|
|
|
|
// std::make_shared<FunctionMaterialize>()));
|
2021-02-05 16:35:21 +00:00
|
|
|
|
2021-03-02 17:08:59 +00:00
|
|
|
// res = &addFunction(func_builder_materialize, {res}, {}, true, false);
|
|
|
|
// res = &addAlias(*res, name, true);
|
|
|
|
// }
|
2021-02-05 16:52:50 +00:00
|
|
|
|
|
|
|
return *res;
|
2020-11-16 14:57:56 +00:00
|
|
|
}
|
|
|
|
|
2021-03-02 17:08:59 +00:00
|
|
|
const ActionsDAG::Node & ActionsDAG::addAlias(const Node & child, std::string alias)
|
2020-11-18 09:08:51 +00:00
|
|
|
{
|
2020-11-16 14:57:56 +00:00
|
|
|
Node node;
|
|
|
|
node.type = ActionType::ALIAS;
|
|
|
|
node.result_type = child.result_type;
|
|
|
|
node.result_name = std::move(alias);
|
|
|
|
node.column = child.column;
|
|
|
|
node.children.emplace_back(&child);
|
|
|
|
|
2021-03-02 17:08:59 +00:00
|
|
|
return addNode(std::move(node));
|
2020-11-16 14:57:56 +00:00
|
|
|
}
|
|
|
|
|
2021-03-02 17:08:59 +00:00
|
|
|
const ActionsDAG::Node & ActionsDAG::addArrayJoin(const Node & child, std::string result_name)
|
2020-11-16 14:57:56 +00:00
|
|
|
{
|
|
|
|
const DataTypeArray * array_type = typeid_cast<const DataTypeArray *>(child.result_type.get());
|
|
|
|
if (!array_type)
|
|
|
|
throw Exception("ARRAY JOIN requires array argument", ErrorCodes::TYPE_MISMATCH);
|
|
|
|
|
|
|
|
Node node;
|
|
|
|
node.type = ActionType::ARRAY_JOIN;
|
|
|
|
node.result_type = array_type->getNestedType();
|
|
|
|
node.result_name = std::move(result_name);
|
|
|
|
node.children.emplace_back(&child);
|
|
|
|
|
|
|
|
return addNode(std::move(node));
|
|
|
|
}
|
|
|
|
|
|
|
|
const ActionsDAG::Node & ActionsDAG::addFunction(
|
|
|
|
const FunctionOverloadResolverPtr & function,
|
2021-03-02 17:08:59 +00:00
|
|
|
NodeRawConstPtrs children,
|
|
|
|
std::string result_name)
|
2020-11-17 12:34:31 +00:00
|
|
|
{
|
2020-11-17 14:51:05 +00:00
|
|
|
size_t num_arguments = children.size();
|
2020-11-16 14:57:56 +00:00
|
|
|
|
|
|
|
Node node;
|
|
|
|
node.type = ActionType::FUNCTION;
|
|
|
|
node.function_builder = function;
|
2020-11-17 14:51:05 +00:00
|
|
|
node.children = std::move(children);
|
2020-11-16 14:57:56 +00:00
|
|
|
|
|
|
|
bool all_const = true;
|
|
|
|
ColumnsWithTypeAndName arguments(num_arguments);
|
|
|
|
|
|
|
|
for (size_t i = 0; i < num_arguments; ++i)
|
|
|
|
{
|
2020-11-17 14:51:05 +00:00
|
|
|
auto & child = *node.children[i];
|
2020-11-16 14:57:56 +00:00
|
|
|
|
|
|
|
ColumnWithTypeAndName argument;
|
|
|
|
argument.column = child.column;
|
|
|
|
argument.type = child.result_type;
|
|
|
|
argument.name = child.result_name;
|
|
|
|
|
|
|
|
if (!argument.column || !isColumnConst(*argument.column))
|
|
|
|
all_const = false;
|
|
|
|
|
|
|
|
arguments[i] = std::move(argument);
|
|
|
|
}
|
|
|
|
|
|
|
|
node.function_base = function->build(arguments);
|
|
|
|
node.result_type = node.function_base->getResultType();
|
|
|
|
node.function = node.function_base->prepare(arguments);
|
|
|
|
|
|
|
|
/// If all arguments are constants, and function is suitable to be executed in 'prepare' stage - execute function.
|
2021-03-02 17:08:59 +00:00
|
|
|
if (all_const && node.function_base->isSuitableForConstantFolding())
|
2020-11-16 14:57:56 +00:00
|
|
|
{
|
|
|
|
size_t num_rows = arguments.empty() ? 0 : arguments.front().column->size();
|
|
|
|
auto col = node.function->execute(arguments, node.result_type, num_rows, true);
|
|
|
|
|
|
|
|
/// If the result is not a constant, just in case, we will consider the result as unknown.
|
|
|
|
if (isColumnConst(*col))
|
|
|
|
{
|
|
|
|
/// All constant (literal) columns in block are added with size 1.
|
|
|
|
/// But if there was no columns in block before executing a function, the result has size 0.
|
|
|
|
/// Change the size to 1.
|
|
|
|
|
|
|
|
if (col->empty())
|
|
|
|
col = col->cloneResized(1);
|
|
|
|
|
|
|
|
node.column = std::move(col);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Some functions like ignore() or getTypeName() always return constant result even if arguments are not constant.
|
|
|
|
/// We can't do constant folding, but can specify in sample block that function result is constant to avoid
|
|
|
|
/// unnecessary materialization.
|
|
|
|
if (!node.column && node.function_base->isSuitableForConstantFolding())
|
|
|
|
{
|
|
|
|
if (auto col = node.function_base->getResultIfAlwaysReturnsConstantAndHasArguments(arguments))
|
|
|
|
{
|
|
|
|
node.column = std::move(col);
|
|
|
|
node.allow_constant_folding = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (result_name.empty())
|
|
|
|
{
|
|
|
|
result_name = function->getName() + "(";
|
2020-11-17 14:51:05 +00:00
|
|
|
for (size_t i = 0; i < num_arguments; ++i)
|
2020-11-16 14:57:56 +00:00
|
|
|
{
|
|
|
|
if (i)
|
|
|
|
result_name += ", ";
|
2020-11-17 14:51:05 +00:00
|
|
|
result_name += node.children[i]->result_name;
|
2020-11-16 14:57:56 +00:00
|
|
|
}
|
|
|
|
result_name += ")";
|
|
|
|
}
|
|
|
|
|
|
|
|
node.result_name = std::move(result_name);
|
|
|
|
|
2021-03-02 17:08:59 +00:00
|
|
|
return addNode(std::move(node));
|
2020-11-16 14:57:56 +00:00
|
|
|
}
|
|
|
|
|
2020-11-17 07:03:11 +00:00
|
|
|
|
2020-11-16 14:57:56 +00:00
|
|
|
NamesAndTypesList ActionsDAG::getRequiredColumns() const
|
|
|
|
{
|
|
|
|
NamesAndTypesList result;
|
2020-11-17 12:34:31 +00:00
|
|
|
for (const auto & input : inputs)
|
|
|
|
result.emplace_back(input->result_name, input->result_type);
|
2020-11-16 14:57:56 +00:00
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
ColumnsWithTypeAndName ActionsDAG::getResultColumns() const
|
|
|
|
{
|
|
|
|
ColumnsWithTypeAndName result;
|
|
|
|
result.reserve(index.size());
|
|
|
|
for (const auto & node : index)
|
|
|
|
result.emplace_back(node->column, node->result_type, node->result_name);
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
NamesAndTypesList ActionsDAG::getNamesAndTypesList() const
|
|
|
|
{
|
|
|
|
NamesAndTypesList result;
|
|
|
|
for (const auto & node : index)
|
|
|
|
result.emplace_back(node->result_name, node->result_type);
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
Names ActionsDAG::getNames() const
|
|
|
|
{
|
|
|
|
Names names;
|
|
|
|
names.reserve(index.size());
|
|
|
|
for (const auto & node : index)
|
|
|
|
names.emplace_back(node->result_name);
|
|
|
|
|
|
|
|
return names;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string ActionsDAG::dumpNames() const
|
|
|
|
{
|
|
|
|
WriteBufferFromOwnString out;
|
|
|
|
for (auto it = nodes.begin(); it != nodes.end(); ++it)
|
|
|
|
{
|
|
|
|
if (it != nodes.begin())
|
|
|
|
out << ", ";
|
|
|
|
out << it->result_name;
|
|
|
|
}
|
|
|
|
return out.str();
|
|
|
|
}
|
|
|
|
|
|
|
|
void ActionsDAG::removeUnusedActions(const Names & required_names)
|
|
|
|
{
|
2021-03-02 17:08:59 +00:00
|
|
|
NodeRawConstPtrs required_nodes;
|
2020-11-16 14:57:56 +00:00
|
|
|
required_nodes.reserve(required_names.size());
|
|
|
|
|
2021-03-02 17:08:59 +00:00
|
|
|
std::unordered_map<std::string_view, std::list<const Node *>> names_map;
|
|
|
|
for (const auto * node : index)
|
|
|
|
names_map[node->result_name].push_back(node);
|
|
|
|
|
2020-11-16 14:57:56 +00:00
|
|
|
for (const auto & name : required_names)
|
|
|
|
{
|
2021-03-02 17:08:59 +00:00
|
|
|
auto & nodes_list = names_map[name];
|
|
|
|
if (nodes_list.empty())
|
2020-11-16 14:57:56 +00:00
|
|
|
throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER,
|
|
|
|
"Unknown column: {}, there are only columns {}", name, dumpNames());
|
|
|
|
|
2021-03-02 17:08:59 +00:00
|
|
|
required_nodes.push_back(nodes_list.front());
|
|
|
|
nodes_list.pop_back();
|
2020-11-16 14:57:56 +00:00
|
|
|
}
|
|
|
|
|
2021-03-02 17:08:59 +00:00
|
|
|
index.swap(required_nodes);
|
2020-11-16 14:57:56 +00:00
|
|
|
removeUnusedActions();
|
|
|
|
}
|
|
|
|
|
2021-02-10 16:26:49 +00:00
|
|
|
void ActionsDAG::removeUnusedActions(bool allow_remove_inputs)
|
2020-11-16 14:57:56 +00:00
|
|
|
{
|
|
|
|
std::unordered_set<const Node *> visited_nodes;
|
|
|
|
std::stack<Node *> stack;
|
|
|
|
|
|
|
|
for (auto * node : index)
|
|
|
|
{
|
|
|
|
visited_nodes.insert(node);
|
2021-03-02 17:08:59 +00:00
|
|
|
stack.push(const_cast<Node *>(node));
|
2020-11-16 14:57:56 +00:00
|
|
|
}
|
|
|
|
|
2020-11-27 09:11:34 +00:00
|
|
|
for (auto & node : nodes)
|
|
|
|
{
|
2021-02-03 10:29:44 +00:00
|
|
|
/// We cannot remove function with side effects even if it returns constant (e.g. ignore(...)).
|
|
|
|
bool prevent_constant_folding = node.column && isColumnConst(*node.column) && !node.allow_constant_folding;
|
|
|
|
/// We cannot remove arrayJoin because it changes the number of rows.
|
|
|
|
bool is_array_join = node.type == ActionType::ARRAY_JOIN;
|
|
|
|
|
|
|
|
bool must_keep_node = is_array_join || prevent_constant_folding;
|
|
|
|
if (must_keep_node && visited_nodes.count(&node) == 0)
|
2020-11-27 09:11:34 +00:00
|
|
|
{
|
|
|
|
visited_nodes.insert(&node);
|
|
|
|
stack.push(&node);
|
|
|
|
}
|
2021-02-10 16:26:49 +00:00
|
|
|
|
|
|
|
if (node.type == ActionType::INPUT && !allow_remove_inputs)
|
|
|
|
visited_nodes.insert(&node);
|
2020-11-27 09:11:34 +00:00
|
|
|
}
|
|
|
|
|
2020-11-16 14:57:56 +00:00
|
|
|
while (!stack.empty())
|
|
|
|
{
|
|
|
|
auto * node = stack.top();
|
|
|
|
stack.pop();
|
|
|
|
|
|
|
|
if (!node->children.empty() && node->column && isColumnConst(*node->column) && node->allow_constant_folding)
|
|
|
|
{
|
|
|
|
/// Constant folding.
|
|
|
|
node->type = ActionsDAG::ActionType::COLUMN;
|
|
|
|
node->children.clear();
|
|
|
|
}
|
|
|
|
|
2021-03-02 17:51:54 +00:00
|
|
|
for (const auto * child : node->children)
|
2020-11-16 14:57:56 +00:00
|
|
|
{
|
|
|
|
if (visited_nodes.count(child) == 0)
|
|
|
|
{
|
2021-03-02 17:51:54 +00:00
|
|
|
stack.push(const_cast<Node *>(child));
|
2020-11-16 14:57:56 +00:00
|
|
|
visited_nodes.insert(child);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
nodes.remove_if([&](const Node & node) { return visited_nodes.count(&node) == 0; });
|
2020-11-17 12:34:31 +00:00
|
|
|
auto it = std::remove_if(inputs.begin(), inputs.end(), [&](const Node * node) { return visited_nodes.count(node) == 0; });
|
|
|
|
inputs.erase(it, inputs.end());
|
2020-11-16 14:57:56 +00:00
|
|
|
}
|
|
|
|
|
2021-03-02 17:08:59 +00:00
|
|
|
void ActionsDAG::addAliases(const NamesWithAliases & aliases, bool project)
|
2020-11-16 14:57:56 +00:00
|
|
|
{
|
2021-03-02 17:08:59 +00:00
|
|
|
std::unordered_map<std::string_view, std::list<const Node *>> names_map;
|
|
|
|
for (const auto * node : index)
|
|
|
|
names_map[node->result_name].push_back(node);
|
|
|
|
|
|
|
|
NodeRawConstPtrs required_nodes;
|
2020-11-16 14:57:56 +00:00
|
|
|
|
|
|
|
for (const auto & item : aliases)
|
|
|
|
{
|
2021-03-02 17:08:59 +00:00
|
|
|
auto & nodes_list = names_map[item.first];
|
|
|
|
if (nodes_list.empty())
|
|
|
|
throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER,
|
2021-03-02 17:51:54 +00:00
|
|
|
"Unknown column: {}, there are only columns {}", item.first, dumpNames());
|
2021-03-02 17:08:59 +00:00
|
|
|
|
|
|
|
const auto * child = nodes_list.front();
|
|
|
|
nodes_list.pop_front();
|
|
|
|
required_nodes.push_back(child);
|
2020-11-16 14:57:56 +00:00
|
|
|
}
|
|
|
|
|
2021-03-02 17:08:59 +00:00
|
|
|
if (project)
|
|
|
|
index.clear();
|
|
|
|
|
|
|
|
index.reserve(index.size() + aliases.size());
|
2020-11-16 14:57:56 +00:00
|
|
|
|
|
|
|
for (size_t i = 0; i < aliases.size(); ++i)
|
|
|
|
{
|
|
|
|
const auto & item = aliases[i];
|
2021-03-02 17:08:59 +00:00
|
|
|
const auto * child = required_nodes[i];
|
2020-11-16 14:57:56 +00:00
|
|
|
|
|
|
|
if (!item.second.empty() && item.first != item.second)
|
|
|
|
{
|
|
|
|
Node node;
|
|
|
|
node.type = ActionType::ALIAS;
|
|
|
|
node.result_type = child->result_type;
|
|
|
|
node.result_name = std::move(item.second);
|
|
|
|
node.column = child->column;
|
|
|
|
node.children.emplace_back(child);
|
|
|
|
|
2021-03-02 17:08:59 +00:00
|
|
|
auto & alias = addNode(std::move(node));
|
|
|
|
index.push_back(&alias);
|
2020-11-16 14:57:56 +00:00
|
|
|
}
|
2021-03-02 17:08:59 +00:00
|
|
|
else if (project)
|
|
|
|
index.push_back(child);
|
2020-11-16 14:57:56 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void ActionsDAG::addAliases(const NamesWithAliases & aliases)
|
|
|
|
{
|
2021-03-02 17:08:59 +00:00
|
|
|
addAliases(aliases, false);
|
2020-11-16 14:57:56 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void ActionsDAG::project(const NamesWithAliases & projection)
|
|
|
|
{
|
2021-03-02 17:08:59 +00:00
|
|
|
addAliases(projection, true);
|
|
|
|
removeUnusedActions();
|
2020-11-16 14:57:56 +00:00
|
|
|
projectInput();
|
2021-03-02 17:08:59 +00:00
|
|
|
projected_output = true;
|
2020-11-16 14:57:56 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
bool ActionsDAG::tryRestoreColumn(const std::string & column_name)
|
|
|
|
{
|
2021-03-02 17:08:59 +00:00
|
|
|
for (const auto * node : index)
|
|
|
|
if (node->result_name == column_name)
|
|
|
|
return true;
|
2020-11-16 14:57:56 +00:00
|
|
|
|
|
|
|
for (auto it = nodes.rbegin(); it != nodes.rend(); ++it)
|
|
|
|
{
|
|
|
|
auto & node = *it;
|
|
|
|
if (node.result_name == column_name)
|
|
|
|
{
|
2021-03-02 17:08:59 +00:00
|
|
|
index.push_back(&node);
|
2020-11-16 14:57:56 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2021-02-04 11:44:00 +00:00
|
|
|
bool ActionsDAG::removeUnusedResult(const std::string & column_name)
|
2021-01-19 10:03:25 +00:00
|
|
|
{
|
2021-02-04 11:44:00 +00:00
|
|
|
/// Find column in index and remove.
|
|
|
|
const Node * col;
|
|
|
|
{
|
|
|
|
auto it = index.begin();
|
|
|
|
for (; it != index.end(); ++it)
|
|
|
|
if ((*it)->result_name == column_name)
|
|
|
|
break;
|
|
|
|
|
|
|
|
if (it == index.end())
|
|
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Not found result {} in ActionsDAG\n{}", column_name, dumpDAG());
|
|
|
|
|
|
|
|
col = *it;
|
2021-03-02 17:08:59 +00:00
|
|
|
index.erase(it);
|
2021-02-04 11:44:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Check if column is in input.
|
2021-01-19 10:03:25 +00:00
|
|
|
auto it = inputs.begin();
|
|
|
|
for (; it != inputs.end(); ++it)
|
2021-02-04 11:44:00 +00:00
|
|
|
if (*it == col)
|
2021-01-19 10:03:25 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
if (it == inputs.end())
|
2021-02-04 11:44:00 +00:00
|
|
|
return false;
|
2021-01-19 10:03:25 +00:00
|
|
|
|
2021-02-04 11:44:00 +00:00
|
|
|
/// Check column has no dependent.
|
2021-01-19 10:03:25 +00:00
|
|
|
for (const auto & node : nodes)
|
|
|
|
for (const auto * child : node.children)
|
2021-02-04 11:44:00 +00:00
|
|
|
if (col == child)
|
|
|
|
return false;
|
2021-01-19 10:03:25 +00:00
|
|
|
|
2021-02-12 15:20:54 +00:00
|
|
|
/// Do not remove input if it was mentioned in index several times.
|
|
|
|
for (const auto * node : index)
|
|
|
|
if (col == node)
|
|
|
|
return false;
|
|
|
|
|
2021-02-04 11:44:00 +00:00
|
|
|
/// Remove from nodes and inputs.
|
2021-01-19 11:48:09 +00:00
|
|
|
for (auto jt = nodes.begin(); jt != nodes.end(); ++jt)
|
|
|
|
{
|
2021-02-04 11:44:00 +00:00
|
|
|
if (&(*jt) == *it)
|
2021-01-19 11:48:09 +00:00
|
|
|
{
|
|
|
|
nodes.erase(jt);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-01-19 10:03:25 +00:00
|
|
|
inputs.erase(it);
|
2021-02-04 11:44:00 +00:00
|
|
|
return true;
|
2021-01-19 10:03:25 +00:00
|
|
|
}
|
|
|
|
|
2020-11-16 14:57:56 +00:00
|
|
|
ActionsDAGPtr ActionsDAG::clone() const
|
|
|
|
{
|
2021-03-02 17:08:59 +00:00
|
|
|
auto actions = std::make_shared<ActionsDAG>();
|
2021-03-02 17:51:54 +00:00
|
|
|
actions->project_input = project_input;
|
|
|
|
actions->projected_output = projected_output;
|
2020-11-16 14:57:56 +00:00
|
|
|
|
|
|
|
std::unordered_map<const Node *, Node *> copy_map;
|
|
|
|
|
|
|
|
for (const auto & node : nodes)
|
|
|
|
{
|
|
|
|
auto & copy_node = actions->nodes.emplace_back(node);
|
|
|
|
copy_map[&node] = ©_node;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (auto & node : actions->nodes)
|
|
|
|
for (auto & child : node.children)
|
|
|
|
child = copy_map[child];
|
|
|
|
|
|
|
|
for (const auto & node : index)
|
2021-03-02 17:08:59 +00:00
|
|
|
actions->index.push_back(copy_map[node]);
|
2020-11-16 14:57:56 +00:00
|
|
|
|
2020-11-17 12:34:31 +00:00
|
|
|
for (const auto & node : inputs)
|
|
|
|
actions->inputs.push_back(copy_map[node]);
|
|
|
|
|
2020-11-16 14:57:56 +00:00
|
|
|
return actions;
|
|
|
|
}
|
|
|
|
|
2021-03-02 17:51:54 +00:00
|
|
|
void ActionsDAG::compileExpressions(std::shared_ptr<CompiledExpressionCache> cache)
|
2020-11-16 14:57:56 +00:00
|
|
|
{
|
2021-03-02 17:51:54 +00:00
|
|
|
compileFunctions(cache);
|
|
|
|
removeUnusedActions();
|
2020-11-16 14:57:56 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
std::string ActionsDAG::dumpDAG() const
|
|
|
|
{
|
|
|
|
std::unordered_map<const Node *, size_t> map;
|
|
|
|
for (const auto & node : nodes)
|
|
|
|
{
|
|
|
|
size_t idx = map.size();
|
|
|
|
map[&node] = idx;
|
|
|
|
}
|
|
|
|
|
|
|
|
WriteBufferFromOwnString out;
|
|
|
|
for (const auto & node : nodes)
|
|
|
|
{
|
|
|
|
out << map[&node] << " : ";
|
|
|
|
switch (node.type)
|
|
|
|
{
|
|
|
|
case ActionsDAG::ActionType::COLUMN:
|
|
|
|
out << "COLUMN ";
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ActionsDAG::ActionType::ALIAS:
|
|
|
|
out << "ALIAS ";
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ActionsDAG::ActionType::FUNCTION:
|
|
|
|
out << "FUNCTION ";
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ActionsDAG::ActionType::ARRAY_JOIN:
|
|
|
|
out << "ARRAY JOIN ";
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ActionsDAG::ActionType::INPUT:
|
|
|
|
out << "INPUT ";
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
out << "(";
|
|
|
|
for (size_t i = 0; i < node.children.size(); ++i)
|
|
|
|
{
|
|
|
|
if (i)
|
|
|
|
out << ", ";
|
|
|
|
out << map[node.children[i]];
|
|
|
|
}
|
|
|
|
out << ")";
|
|
|
|
|
|
|
|
out << " " << (node.column ? node.column->getName() : "(no column)");
|
|
|
|
out << " " << (node.result_type ? node.result_type->getName() : "(no type)");
|
|
|
|
out << " " << (!node.result_name.empty() ? node.result_name : "(no name)");
|
|
|
|
if (node.function_base)
|
|
|
|
out << " [" << node.function_base->getName() << "]";
|
|
|
|
|
|
|
|
out << "\n";
|
|
|
|
}
|
|
|
|
|
2021-01-12 18:47:54 +00:00
|
|
|
out << "Index:";
|
|
|
|
for (const auto * node : index)
|
|
|
|
out << ' ' << map[node];
|
|
|
|
out << '\n';
|
|
|
|
|
2020-11-16 14:57:56 +00:00
|
|
|
return out.str();
|
|
|
|
}
|
|
|
|
|
|
|
|
bool ActionsDAG::hasArrayJoin() const
|
|
|
|
{
|
|
|
|
for (const auto & node : nodes)
|
|
|
|
if (node.type == ActionType::ARRAY_JOIN)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2020-11-27 09:50:56 +00:00
|
|
|
bool ActionsDAG::hasStatefulFunctions() const
|
|
|
|
{
|
|
|
|
for (const auto & node : nodes)
|
|
|
|
if (node.type == ActionType::FUNCTION && node.function_base->isStateful())
|
|
|
|
return true;
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2021-01-28 11:00:24 +00:00
|
|
|
bool ActionsDAG::trivial() const
|
2020-11-16 14:57:56 +00:00
|
|
|
{
|
|
|
|
for (const auto & node : nodes)
|
2021-01-28 11:00:24 +00:00
|
|
|
if (node.type == ActionType::FUNCTION || node.type == ActionType::ARRAY_JOIN)
|
2020-11-16 14:57:56 +00:00
|
|
|
return false;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2021-02-04 20:36:50 +00:00
|
|
|
void ActionsDAG::addMaterializingOutputActions()
|
|
|
|
{
|
|
|
|
FunctionOverloadResolverPtr func_builder_materialize =
|
|
|
|
std::make_shared<FunctionOverloadResolverAdaptor>(
|
|
|
|
std::make_unique<DefaultOverloadResolver>(
|
|
|
|
std::make_shared<FunctionMaterialize>()));
|
|
|
|
|
2021-03-02 17:08:59 +00:00
|
|
|
for (auto & node : index)
|
2021-02-04 20:36:50 +00:00
|
|
|
{
|
|
|
|
auto & name = node->result_name;
|
2021-03-02 17:08:59 +00:00
|
|
|
node = &addFunction(func_builder_materialize, {node}, {});
|
|
|
|
node = &addAlias(*node, name);
|
2021-02-04 20:36:50 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-11-17 14:51:05 +00:00
|
|
|
ActionsDAGPtr ActionsDAG::makeConvertingActions(
|
|
|
|
const ColumnsWithTypeAndName & source,
|
|
|
|
const ColumnsWithTypeAndName & result,
|
|
|
|
MatchColumnsMode mode,
|
|
|
|
bool ignore_constant_values)
|
|
|
|
{
|
|
|
|
size_t num_input_columns = source.size();
|
|
|
|
size_t num_result_columns = result.size();
|
|
|
|
|
|
|
|
if (mode == MatchColumnsMode::Position && num_input_columns != num_result_columns)
|
|
|
|
throw Exception("Number of columns doesn't match", ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH);
|
|
|
|
|
|
|
|
auto actions_dag = std::make_shared<ActionsDAG>(source);
|
2021-03-02 17:08:59 +00:00
|
|
|
NodeRawConstPtrs projection(num_result_columns);
|
2020-11-17 14:51:05 +00:00
|
|
|
|
2020-11-17 19:43:26 +00:00
|
|
|
FunctionOverloadResolverPtr func_builder_materialize =
|
|
|
|
std::make_shared<FunctionOverloadResolverAdaptor>(
|
|
|
|
std::make_unique<DefaultOverloadResolver>(
|
|
|
|
std::make_shared<FunctionMaterialize>()));
|
|
|
|
|
2020-11-17 14:51:05 +00:00
|
|
|
std::map<std::string_view, std::list<size_t>> inputs;
|
|
|
|
if (mode == MatchColumnsMode::Name)
|
|
|
|
{
|
|
|
|
for (size_t pos = 0; pos < actions_dag->inputs.size(); ++pos)
|
|
|
|
inputs[actions_dag->inputs[pos]->result_name].push_back(pos);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (size_t result_col_num = 0; result_col_num < num_result_columns; ++result_col_num)
|
|
|
|
{
|
|
|
|
const auto & res_elem = result[result_col_num];
|
2021-03-02 17:08:59 +00:00
|
|
|
const Node * src_node = nullptr;
|
2020-11-17 14:51:05 +00:00
|
|
|
|
|
|
|
switch (mode)
|
|
|
|
{
|
|
|
|
case MatchColumnsMode::Position:
|
|
|
|
{
|
|
|
|
src_node = actions_dag->inputs[result_col_num];
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
case MatchColumnsMode::Name:
|
|
|
|
{
|
|
|
|
auto & input = inputs[res_elem.name];
|
|
|
|
if (input.empty())
|
2020-12-22 17:46:31 +00:00
|
|
|
throw Exception("Cannot find column " + backQuote(res_elem.name) + " in source stream",
|
2020-11-17 14:51:05 +00:00
|
|
|
ErrorCodes::THERE_IS_NO_COLUMN);
|
|
|
|
|
|
|
|
src_node = actions_dag->inputs[input.front()];
|
|
|
|
input.pop_front();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Check constants.
|
|
|
|
if (const auto * res_const = typeid_cast<const ColumnConst *>(res_elem.column.get()))
|
|
|
|
{
|
|
|
|
if (const auto * src_const = typeid_cast<const ColumnConst *>(src_node->column.get()))
|
|
|
|
{
|
|
|
|
if (ignore_constant_values)
|
2021-03-02 17:08:59 +00:00
|
|
|
src_node = &actions_dag->addColumn(res_elem);
|
2020-11-17 14:51:05 +00:00
|
|
|
else if (res_const->getField() != src_const->getField())
|
2020-12-15 17:39:58 +00:00
|
|
|
throw Exception("Cannot convert column " + backQuote(res_elem.name) + " because "
|
2020-11-17 14:51:05 +00:00
|
|
|
"it is constant but values of constants are different in source and result",
|
|
|
|
ErrorCodes::ILLEGAL_COLUMN);
|
|
|
|
}
|
|
|
|
else
|
2020-12-15 17:39:58 +00:00
|
|
|
throw Exception("Cannot convert column " + backQuote(res_elem.name) + " because "
|
2020-11-17 14:51:05 +00:00
|
|
|
"it is non constant in source stream but must be constant in result",
|
|
|
|
ErrorCodes::ILLEGAL_COLUMN);
|
|
|
|
}
|
|
|
|
|
2020-11-20 16:52:50 +00:00
|
|
|
/// Add CAST function to convert into result type if needed.
|
2020-11-17 18:36:13 +00:00
|
|
|
if (!res_elem.type->equals(*src_node->result_type))
|
2020-11-17 14:51:05 +00:00
|
|
|
{
|
|
|
|
ColumnWithTypeAndName column;
|
|
|
|
column.name = res_elem.type->getName();
|
|
|
|
column.column = DataTypeString().createColumnConst(0, column.name);
|
|
|
|
column.type = std::make_shared<DataTypeString>();
|
|
|
|
|
2021-03-02 17:08:59 +00:00
|
|
|
const auto * right_arg = &actions_dag->addColumn(std::move(column));
|
|
|
|
const auto * left_arg = src_node;
|
2020-11-17 14:51:05 +00:00
|
|
|
|
2020-12-05 13:18:56 +00:00
|
|
|
FunctionCast::Diagnostic diagnostic = {src_node->result_name, res_elem.name};
|
2020-11-18 09:35:32 +00:00
|
|
|
FunctionOverloadResolverPtr func_builder_cast =
|
|
|
|
std::make_shared<FunctionOverloadResolverAdaptor>(
|
2020-12-05 13:18:56 +00:00
|
|
|
CastOverloadResolver<CastType::nonAccurate>::createImpl(false, std::move(diagnostic)));
|
2020-11-18 09:35:32 +00:00
|
|
|
|
2021-03-02 17:08:59 +00:00
|
|
|
NodeRawConstPtrs children = { left_arg, right_arg };
|
|
|
|
src_node = &actions_dag->addFunction(func_builder_cast, std::move(children), {});
|
2020-11-17 14:51:05 +00:00
|
|
|
}
|
|
|
|
|
2020-11-17 21:31:30 +00:00
|
|
|
if (src_node->column && isColumnConst(*src_node->column) && !(res_elem.column && isColumnConst(*res_elem.column)))
|
2020-11-17 19:43:26 +00:00
|
|
|
{
|
2021-03-02 17:08:59 +00:00
|
|
|
NodeRawConstPtrs children = {src_node};
|
|
|
|
src_node = &actions_dag->addFunction(func_builder_materialize, std::move(children), {});
|
2020-11-17 19:43:26 +00:00
|
|
|
}
|
|
|
|
|
2020-11-17 14:51:05 +00:00
|
|
|
if (src_node->result_name != res_elem.name)
|
2021-03-02 17:08:59 +00:00
|
|
|
src_node = &actions_dag->addAlias(*src_node, res_elem.name);
|
2020-11-17 14:51:05 +00:00
|
|
|
|
|
|
|
projection[result_col_num] = src_node;
|
|
|
|
}
|
|
|
|
|
2021-03-02 17:08:59 +00:00
|
|
|
actions_dag->index.swap(projection);
|
|
|
|
actions_dag->removeUnusedActions();
|
2020-11-17 14:51:05 +00:00
|
|
|
actions_dag->projectInput();
|
|
|
|
|
|
|
|
return actions_dag;
|
|
|
|
}
|
|
|
|
|
2021-02-04 14:25:11 +00:00
|
|
|
ActionsDAGPtr ActionsDAG::makeAddingColumnActions(ColumnWithTypeAndName column)
|
|
|
|
{
|
|
|
|
auto adding_column_action = std::make_shared<ActionsDAG>();
|
|
|
|
FunctionOverloadResolverPtr func_builder_materialize =
|
|
|
|
std::make_shared<FunctionOverloadResolverAdaptor>(
|
|
|
|
std::make_unique<DefaultOverloadResolver>(
|
|
|
|
std::make_shared<FunctionMaterialize>()));
|
|
|
|
|
|
|
|
auto column_name = column.name;
|
2021-03-02 17:08:59 +00:00
|
|
|
const auto * column_node = &adding_column_action->addColumn(std::move(column));
|
|
|
|
NodeRawConstPtrs inputs = {column_node};
|
|
|
|
auto & function_node = adding_column_action->addFunction(func_builder_materialize, std::move(inputs), {});
|
|
|
|
auto & alias_node = adding_column_action->addAlias(function_node, std::move(column_name));
|
2021-02-04 14:25:11 +00:00
|
|
|
|
2021-03-02 17:51:54 +00:00
|
|
|
adding_column_action->index.push_back(&alias_node);
|
2021-02-04 14:25:11 +00:00
|
|
|
return adding_column_action;
|
|
|
|
}
|
|
|
|
|
2020-12-01 11:19:03 +00:00
|
|
|
ActionsDAGPtr ActionsDAG::merge(ActionsDAG && first, ActionsDAG && second)
|
2020-11-26 16:16:44 +00:00
|
|
|
{
|
2020-12-01 11:19:03 +00:00
|
|
|
/// first: x (1), x (2), y ==> x (2), z, x (3)
|
|
|
|
/// second: x (1), x (2), x (3) ==> x (3), x (2), x (1)
|
2021-01-18 21:54:01 +00:00
|
|
|
/// merge: x (1), x (2), x (3), y =(first)=> x (2), z, x (4), x (3) =(second)=> x (3), x (4), x (2), z
|
2020-11-26 16:16:44 +00:00
|
|
|
|
2020-12-01 11:19:03 +00:00
|
|
|
/// Will store merged result in `first`.
|
2020-11-26 16:16:44 +00:00
|
|
|
|
2020-12-01 11:19:03 +00:00
|
|
|
/// This map contains nodes which should be removed from `first` index, cause they are used as inputs for `second`.
|
2021-01-12 18:58:05 +00:00
|
|
|
/// The second element is the number of removes (cause one node may be repeated several times in result).
|
2021-03-02 17:08:59 +00:00
|
|
|
std::unordered_map<const Node *, size_t> removed_first_result;
|
2020-12-01 11:19:03 +00:00
|
|
|
/// Map inputs of `second` to nodes of `first`.
|
2021-03-02 17:08:59 +00:00
|
|
|
std::unordered_map<const Node *, const Node *> inputs_map;
|
2020-11-26 16:16:44 +00:00
|
|
|
|
|
|
|
/// Update inputs list.
|
|
|
|
{
|
2020-12-01 11:19:03 +00:00
|
|
|
/// Index may have multiple columns with same name. They also may be used by `second`. Order is important.
|
2021-03-02 17:08:59 +00:00
|
|
|
std::unordered_map<std::string_view, std::list<const Node *>> first_result;
|
|
|
|
for (const auto & node : first.index)
|
2020-12-01 11:19:03 +00:00
|
|
|
first_result[node->result_name].push_back(node);
|
2020-11-26 16:16:44 +00:00
|
|
|
|
2021-03-02 17:08:59 +00:00
|
|
|
for (const auto & node : second.inputs)
|
2020-11-26 16:16:44 +00:00
|
|
|
{
|
2020-12-01 11:19:03 +00:00
|
|
|
auto it = first_result.find(node->result_name);
|
|
|
|
if (it == first_result.end() || it->second.empty())
|
2020-11-26 16:16:44 +00:00
|
|
|
{
|
2021-03-02 17:51:54 +00:00
|
|
|
if (first.project_input)
|
2020-11-26 16:16:44 +00:00
|
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
|
|
|
"Cannot find column {} in ActionsDAG result", node->result_name);
|
|
|
|
|
2020-12-01 11:19:03 +00:00
|
|
|
first.inputs.push_back(node);
|
2020-11-26 16:16:44 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
inputs_map[node] = it->second.front();
|
2021-01-12 18:58:05 +00:00
|
|
|
removed_first_result[it->second.front()] += 1;
|
2020-11-26 16:16:44 +00:00
|
|
|
it->second.pop_front();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-12-01 11:19:03 +00:00
|
|
|
/// Replace inputs from `second` to nodes from `first` result.
|
2021-03-02 17:51:54 +00:00
|
|
|
for (auto & node : second.nodes)
|
2020-11-26 19:48:21 +00:00
|
|
|
{
|
2021-03-02 17:51:54 +00:00
|
|
|
for (auto & child : node.children)
|
2020-11-26 19:48:21 +00:00
|
|
|
{
|
|
|
|
if (child->type == ActionType::INPUT)
|
|
|
|
{
|
|
|
|
auto it = inputs_map.find(child);
|
|
|
|
if (it != inputs_map.end())
|
|
|
|
child = it->second;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-02 17:51:54 +00:00
|
|
|
for (auto & node : second.index)
|
2020-11-26 19:48:21 +00:00
|
|
|
{
|
|
|
|
if (node->type == ActionType::INPUT)
|
|
|
|
{
|
|
|
|
auto it = inputs_map.find(node);
|
|
|
|
if (it != inputs_map.end())
|
|
|
|
node = it->second;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-11-26 16:16:44 +00:00
|
|
|
/// Update index.
|
2021-03-02 17:08:59 +00:00
|
|
|
if (second.project_input)
|
2020-11-26 16:16:44 +00:00
|
|
|
{
|
2020-12-01 11:19:03 +00:00
|
|
|
first.index.swap(second.index);
|
2021-03-02 17:08:59 +00:00
|
|
|
first.project_input = true;
|
2020-11-26 16:16:44 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2021-03-02 17:08:59 +00:00
|
|
|
/// Add not removed result from first actions.
|
|
|
|
for (const auto * node : first.index)
|
2020-11-26 16:16:44 +00:00
|
|
|
{
|
2021-03-02 17:08:59 +00:00
|
|
|
auto it = removed_first_result.find(node);
|
|
|
|
if (it != removed_first_result.end() && it->second > 0)
|
|
|
|
--it->second;
|
|
|
|
else
|
|
|
|
second.index.push_back(node);
|
2020-11-26 16:16:44 +00:00
|
|
|
}
|
|
|
|
|
2021-03-02 17:08:59 +00:00
|
|
|
first.index.swap(second.index);
|
2020-11-26 16:16:44 +00:00
|
|
|
}
|
|
|
|
|
2020-12-01 11:19:03 +00:00
|
|
|
first.nodes.splice(first.nodes.end(), std::move(second.nodes));
|
2020-11-26 16:16:44 +00:00
|
|
|
|
2021-03-02 17:51:54 +00:00
|
|
|
first.projected_output = second.projected_output;
|
2020-11-26 16:16:44 +00:00
|
|
|
|
|
|
|
/// Drop unused inputs and, probably, some actions.
|
2020-12-01 11:19:03 +00:00
|
|
|
first.removeUnusedActions();
|
2020-11-26 16:16:44 +00:00
|
|
|
|
2020-12-01 11:19:03 +00:00
|
|
|
return std::make_shared<ActionsDAG>(std::move(first));
|
2020-11-26 16:16:44 +00:00
|
|
|
}
|
|
|
|
|
2021-02-04 11:44:00 +00:00
|
|
|
ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set<const Node *> split_nodes) const
|
2021-01-18 14:59:59 +00:00
|
|
|
{
|
|
|
|
/// Split DAG into two parts.
|
|
|
|
/// (first_nodes, first_index) is a part which will have split_list in result.
|
|
|
|
/// (second_nodes, second_index) is a part which will have same index as current actions.
|
2021-03-02 17:08:59 +00:00
|
|
|
Nodes second_nodes;
|
|
|
|
Nodes first_nodes;
|
|
|
|
NodeRawConstPtrs second_index;
|
|
|
|
NodeRawConstPtrs first_index;
|
2021-01-18 14:59:59 +00:00
|
|
|
|
|
|
|
/// List of nodes from current actions which are not inputs, but will be in second part.
|
2021-03-02 17:08:59 +00:00
|
|
|
NodeRawConstPtrs new_inputs;
|
2021-01-18 14:59:59 +00:00
|
|
|
|
|
|
|
struct Frame
|
|
|
|
{
|
|
|
|
const Node * node;
|
|
|
|
size_t next_child_to_visit = 0;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct Data
|
|
|
|
{
|
|
|
|
bool needed_by_split_node = false;
|
|
|
|
bool visited = false;
|
|
|
|
bool used_in_result = false;
|
|
|
|
|
|
|
|
/// Copies of node in one of the DAGs.
|
|
|
|
/// For COLUMN and INPUT both copies may exist.
|
|
|
|
Node * to_second = nullptr;
|
|
|
|
Node * to_first = nullptr;
|
|
|
|
};
|
|
|
|
|
|
|
|
std::stack<Frame> stack;
|
|
|
|
std::unordered_map<const Node *, Data> data;
|
|
|
|
|
|
|
|
for (const auto & node : index)
|
|
|
|
data[node].used_in_result = true;
|
|
|
|
|
|
|
|
/// DFS. Decide if node is needed by split.
|
|
|
|
for (const auto & node : nodes)
|
|
|
|
{
|
|
|
|
if (split_nodes.count(&node) == 0)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
auto & cur_data = data[&node];
|
|
|
|
if (cur_data.needed_by_split_node)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
cur_data.needed_by_split_node = true;
|
|
|
|
stack.push({.node = &node});
|
|
|
|
|
|
|
|
while (!stack.empty())
|
|
|
|
{
|
|
|
|
auto & cur_node = stack.top().node;
|
|
|
|
stack.pop();
|
|
|
|
|
|
|
|
for (const auto * child : cur_node->children)
|
|
|
|
{
|
|
|
|
auto & child_data = data[child];
|
|
|
|
if (!child_data.needed_by_split_node)
|
|
|
|
{
|
|
|
|
child_data.needed_by_split_node = true;
|
|
|
|
stack.push({.node = child});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// DFS. Move nodes to one of the DAGs.
|
|
|
|
for (const auto & node : nodes)
|
|
|
|
{
|
|
|
|
if (!data[&node].visited)
|
|
|
|
stack.push({.node = &node});
|
|
|
|
|
|
|
|
while (!stack.empty())
|
|
|
|
{
|
|
|
|
auto & cur = stack.top();
|
|
|
|
auto & cur_data = data[cur.node];
|
|
|
|
|
|
|
|
/// At first, visit all children.
|
|
|
|
while (cur.next_child_to_visit < cur.node->children.size())
|
|
|
|
{
|
|
|
|
auto * child = cur.node->children[cur.next_child_to_visit];
|
|
|
|
auto & child_data = data[child];
|
|
|
|
|
|
|
|
if (!child_data.visited)
|
|
|
|
{
|
|
|
|
stack.push({.node = child});
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
++cur.next_child_to_visit;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Make a copy part.
|
|
|
|
if (cur.next_child_to_visit == cur.node->children.size())
|
|
|
|
{
|
|
|
|
cur_data.visited = true;
|
|
|
|
stack.pop();
|
|
|
|
|
|
|
|
if (!cur_data.needed_by_split_node)
|
|
|
|
{
|
|
|
|
auto & copy = second_nodes.emplace_back(*cur.node);
|
|
|
|
cur_data.to_second = ©
|
|
|
|
|
|
|
|
/// Replace children to newly created nodes.
|
|
|
|
for (auto & child : copy.children)
|
|
|
|
{
|
|
|
|
auto & child_data = data[child];
|
|
|
|
|
2021-01-22 13:46:56 +00:00
|
|
|
/// If children is not created, it may be from split part.
|
2021-01-18 14:59:59 +00:00
|
|
|
if (!child_data.to_second)
|
|
|
|
{
|
|
|
|
if (child->type == ActionType::COLUMN) /// Just create new node for COLUMN action.
|
|
|
|
{
|
|
|
|
child_data.to_second = &second_nodes.emplace_back(*child);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/// Node from first part is added as new input.
|
|
|
|
Node input_node;
|
|
|
|
input_node.type = ActionType::INPUT;
|
|
|
|
input_node.result_type = child->result_type;
|
|
|
|
input_node.result_name = child->result_name;
|
|
|
|
child_data.to_second = &second_nodes.emplace_back(std::move(input_node));
|
|
|
|
|
|
|
|
new_inputs.push_back(child);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
child = child_data.to_second;
|
|
|
|
}
|
2021-01-18 20:52:33 +00:00
|
|
|
|
|
|
|
/// Input from second DAG should also be in the first.
|
|
|
|
if (copy.type == ActionType::INPUT)
|
|
|
|
{
|
|
|
|
auto & input_copy = first_nodes.emplace_back(*cur.node);
|
|
|
|
assert(cur_data.to_first == nullptr);
|
|
|
|
cur_data.to_first = &input_copy;
|
|
|
|
new_inputs.push_back(cur.node);
|
|
|
|
}
|
2021-01-18 14:59:59 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
auto & copy = first_nodes.emplace_back(*cur.node);
|
|
|
|
cur_data.to_first = ©
|
|
|
|
|
|
|
|
/// Replace children to newly created nodes.
|
|
|
|
for (auto & child : copy.children)
|
|
|
|
{
|
|
|
|
child = data[child].to_first;
|
|
|
|
assert(child != nullptr);
|
|
|
|
}
|
|
|
|
|
2021-01-18 20:34:46 +00:00
|
|
|
if (cur_data.used_in_result)
|
2021-01-18 14:59:59 +00:00
|
|
|
{
|
|
|
|
/// If this node is needed in result, add it as input.
|
|
|
|
Node input_node;
|
|
|
|
input_node.type = ActionType::INPUT;
|
|
|
|
input_node.result_type = node.result_type;
|
|
|
|
input_node.result_name = node.result_name;
|
|
|
|
cur_data.to_second = &second_nodes.emplace_back(std::move(input_node));
|
|
|
|
|
2021-01-18 20:34:46 +00:00
|
|
|
new_inputs.push_back(cur.node);
|
2021-01-18 14:59:59 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-02 17:08:59 +00:00
|
|
|
for (const auto * node : index)
|
|
|
|
second_index.push_back(data[node].to_second);
|
2021-01-18 14:59:59 +00:00
|
|
|
|
2021-03-02 17:08:59 +00:00
|
|
|
NodeRawConstPtrs second_inputs;
|
|
|
|
NodeRawConstPtrs first_inputs;
|
2021-01-18 14:59:59 +00:00
|
|
|
|
2021-03-02 17:08:59 +00:00
|
|
|
for (const auto * input : inputs)
|
2021-01-18 14:59:59 +00:00
|
|
|
{
|
|
|
|
const auto & cur = data[input];
|
|
|
|
first_inputs.push_back(cur.to_first);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (const auto * input : new_inputs)
|
|
|
|
{
|
|
|
|
const auto & cur = data[input];
|
|
|
|
second_inputs.push_back(cur.to_second);
|
2021-03-02 17:08:59 +00:00
|
|
|
first_index.push_back(cur.to_first);
|
2021-01-18 14:59:59 +00:00
|
|
|
}
|
|
|
|
|
2021-03-02 17:08:59 +00:00
|
|
|
auto first_actions = std::make_shared<ActionsDAG>();
|
2021-01-18 14:59:59 +00:00
|
|
|
first_actions->nodes.swap(first_nodes);
|
|
|
|
first_actions->index.swap(first_index);
|
|
|
|
first_actions->inputs.swap(first_inputs);
|
|
|
|
|
2021-03-02 17:08:59 +00:00
|
|
|
auto second_actions = std::make_shared<ActionsDAG>();
|
2021-01-18 14:59:59 +00:00
|
|
|
second_actions->nodes.swap(second_nodes);
|
|
|
|
second_actions->index.swap(second_index);
|
|
|
|
second_actions->inputs.swap(second_inputs);
|
|
|
|
|
|
|
|
return {std::move(first_actions), std::move(second_actions)};
|
|
|
|
}
|
|
|
|
|
2021-02-04 11:44:00 +00:00
|
|
|
ActionsDAG::SplitResult ActionsDAG::splitActionsBeforeArrayJoin(const NameSet & array_joined_columns) const
|
2020-11-16 14:57:56 +00:00
|
|
|
{
|
|
|
|
|
|
|
|
struct Frame
|
|
|
|
{
|
2021-01-18 16:12:16 +00:00
|
|
|
const Node * node;
|
2020-11-16 14:57:56 +00:00
|
|
|
size_t next_child_to_visit = 0;
|
|
|
|
};
|
|
|
|
|
2021-01-18 16:12:16 +00:00
|
|
|
std::unordered_set<const Node *> split_nodes;
|
|
|
|
std::unordered_set<const Node *> visited_nodes;
|
2020-11-16 14:57:56 +00:00
|
|
|
|
|
|
|
std::stack<Frame> stack;
|
|
|
|
|
2021-01-18 16:12:16 +00:00
|
|
|
/// DFS. Decide if node depends on ARRAY JOIN.
|
|
|
|
for (const auto & node : nodes)
|
2020-11-16 14:57:56 +00:00
|
|
|
{
|
2021-01-18 16:12:16 +00:00
|
|
|
if (visited_nodes.count(&node))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
visited_nodes.insert(&node);
|
|
|
|
stack.push({.node = &node});
|
2020-11-16 14:57:56 +00:00
|
|
|
|
|
|
|
while (!stack.empty())
|
|
|
|
{
|
|
|
|
auto & cur = stack.top();
|
|
|
|
|
|
|
|
/// At first, visit all children. We depend on ARRAY JOIN if any child does.
|
|
|
|
while (cur.next_child_to_visit < cur.node->children.size())
|
|
|
|
{
|
2021-03-02 17:08:59 +00:00
|
|
|
const auto * child = cur.node->children[cur.next_child_to_visit];
|
2020-11-16 14:57:56 +00:00
|
|
|
|
2021-01-18 16:12:16 +00:00
|
|
|
if (visited_nodes.count(child) == 0)
|
2020-11-16 14:57:56 +00:00
|
|
|
{
|
2021-01-18 16:12:16 +00:00
|
|
|
visited_nodes.insert(child);
|
2020-11-16 14:57:56 +00:00
|
|
|
stack.push({.node = child});
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
++cur.next_child_to_visit;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (cur.next_child_to_visit == cur.node->children.size())
|
|
|
|
{
|
2021-01-18 16:12:16 +00:00
|
|
|
bool depend_on_array_join = false;
|
2020-11-16 14:57:56 +00:00
|
|
|
if (cur.node->type == ActionType::INPUT && array_joined_columns.count(cur.node->result_name))
|
2021-01-18 16:12:16 +00:00
|
|
|
depend_on_array_join = true;
|
2020-11-16 14:57:56 +00:00
|
|
|
|
2021-01-18 16:12:16 +00:00
|
|
|
for (const auto * child : cur.node->children)
|
2020-11-16 14:57:56 +00:00
|
|
|
{
|
2021-01-18 16:12:16 +00:00
|
|
|
if (split_nodes.count(child) == 0)
|
|
|
|
depend_on_array_join = true;
|
2020-11-16 14:57:56 +00:00
|
|
|
}
|
|
|
|
|
2021-01-18 16:12:16 +00:00
|
|
|
if (!depend_on_array_join)
|
|
|
|
split_nodes.insert(cur.node);
|
2020-11-16 14:57:56 +00:00
|
|
|
|
2021-01-18 16:12:16 +00:00
|
|
|
stack.pop();
|
2020-11-16 14:57:56 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-01-18 19:56:34 +00:00
|
|
|
auto res = split(split_nodes);
|
|
|
|
/// Do not remove array joined columns if they are not used.
|
2021-03-02 17:08:59 +00:00
|
|
|
res.first->project_input = false;
|
2021-01-18 19:56:34 +00:00
|
|
|
return res;
|
2020-11-16 14:57:56 +00:00
|
|
|
}
|
|
|
|
|
2021-02-04 11:44:00 +00:00
|
|
|
ActionsDAG::SplitResult ActionsDAG::splitActionsForFilter(const std::string & column_name) const
|
2021-01-19 10:03:25 +00:00
|
|
|
{
|
2021-01-19 14:53:51 +00:00
|
|
|
auto it = index.begin();
|
|
|
|
for (; it != index.end(); ++it)
|
|
|
|
if ((*it)->result_name == column_name)
|
|
|
|
break;
|
|
|
|
|
2021-01-19 10:03:25 +00:00
|
|
|
if (it == index.end())
|
|
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
|
|
|
"Index for ActionsDAG does not contain filter column name {}. DAG:\n{}",
|
|
|
|
column_name, dumpDAG());
|
|
|
|
|
|
|
|
std::unordered_set<const Node *> split_nodes = {*it};
|
|
|
|
return split(split_nodes);
|
|
|
|
}
|
|
|
|
|
2021-02-20 16:13:36 +00:00
|
|
|
namespace
|
2021-02-10 16:26:49 +00:00
|
|
|
{
|
|
|
|
|
2021-02-26 16:29:56 +00:00
|
|
|
struct ConjunctionNodes
|
2021-02-20 16:13:36 +00:00
|
|
|
{
|
2021-03-02 17:51:54 +00:00
|
|
|
ActionsDAG::NodeRawConstPtrs allowed;
|
|
|
|
ActionsDAG::NodeRawConstPtrs rejected;
|
2021-02-20 16:13:36 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
/// Take a node which result is predicate.
|
|
|
|
/// Assuming predicate is a conjunction (probably, trivial).
|
|
|
|
/// Find separate conjunctions nodes. Split nodes into allowed and rejected sets.
|
|
|
|
/// Allowed predicate is a predicate which can be calculated using only nodes from allowed_nodes set.
|
2021-02-26 16:29:56 +00:00
|
|
|
ConjunctionNodes getConjunctionNodes(ActionsDAG::Node * predicate, std::unordered_set<const ActionsDAG::Node *> allowed_nodes)
|
2021-02-20 16:13:36 +00:00
|
|
|
{
|
2021-02-26 16:29:56 +00:00
|
|
|
ConjunctionNodes conjunction;
|
2021-03-02 17:51:54 +00:00
|
|
|
std::unordered_set<const ActionsDAG::Node *> allowed;
|
|
|
|
std::unordered_set<const ActionsDAG::Node *> rejected;
|
2021-02-20 16:13:36 +00:00
|
|
|
|
|
|
|
struct Frame
|
2021-02-10 16:26:49 +00:00
|
|
|
{
|
2021-03-02 17:08:59 +00:00
|
|
|
const ActionsDAG::Node * node;
|
2021-02-20 16:13:36 +00:00
|
|
|
bool is_predicate = false;
|
|
|
|
size_t next_child_to_visit = 0;
|
|
|
|
size_t num_allowed_children = 0;
|
|
|
|
};
|
|
|
|
|
|
|
|
std::stack<Frame> stack;
|
2021-03-02 17:08:59 +00:00
|
|
|
std::unordered_set<const ActionsDAG::Node *> visited_nodes;
|
2021-02-20 16:13:36 +00:00
|
|
|
|
|
|
|
stack.push(Frame{.node = predicate, .is_predicate = true});
|
|
|
|
visited_nodes.insert(predicate);
|
|
|
|
while (!stack.empty())
|
|
|
|
{
|
|
|
|
auto & cur = stack.top();
|
|
|
|
bool is_conjunction = cur.is_predicate
|
|
|
|
&& cur.node->type == ActionsDAG::ActionType::FUNCTION
|
|
|
|
&& cur.node->function_base->getName() == "and";
|
|
|
|
|
|
|
|
/// At first, visit all children.
|
|
|
|
while (cur.next_child_to_visit < cur.node->children.size())
|
|
|
|
{
|
2021-03-02 17:08:59 +00:00
|
|
|
const auto * child = cur.node->children[cur.next_child_to_visit];
|
2021-02-20 16:13:36 +00:00
|
|
|
|
|
|
|
if (visited_nodes.count(child) == 0)
|
|
|
|
{
|
|
|
|
visited_nodes.insert(child);
|
|
|
|
stack.push({.node = child, .is_predicate = is_conjunction});
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (allowed_nodes.contains(child))
|
|
|
|
++cur.num_allowed_children;
|
|
|
|
++cur.next_child_to_visit;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (cur.next_child_to_visit == cur.node->children.size())
|
|
|
|
{
|
|
|
|
if (cur.num_allowed_children == cur.node->children.size())
|
|
|
|
{
|
|
|
|
if (cur.node->type != ActionsDAG::ActionType::ARRAY_JOIN && cur.node->type != ActionsDAG::ActionType::INPUT)
|
|
|
|
allowed_nodes.emplace(cur.node);
|
|
|
|
}
|
|
|
|
else if (is_conjunction)
|
|
|
|
{
|
2021-03-02 17:08:59 +00:00
|
|
|
for (const auto * child : cur.node->children)
|
2021-02-20 17:42:06 +00:00
|
|
|
{
|
2021-02-20 16:13:36 +00:00
|
|
|
if (allowed_nodes.count(child))
|
2021-02-20 17:42:06 +00:00
|
|
|
{
|
|
|
|
if (allowed.insert(child).second)
|
|
|
|
conjunction.allowed.push_back(child);
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
2021-02-20 16:13:36 +00:00
|
|
|
}
|
|
|
|
else if (cur.is_predicate)
|
|
|
|
{
|
2021-02-20 17:42:06 +00:00
|
|
|
if (rejected.insert(cur.node).second)
|
|
|
|
conjunction.rejected.push_back(cur.node);
|
2021-02-20 16:13:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
stack.pop();
|
|
|
|
}
|
|
|
|
}
|
2021-02-10 16:26:49 +00:00
|
|
|
|
2021-02-20 16:13:36 +00:00
|
|
|
if (conjunction.allowed.empty())
|
|
|
|
{
|
2021-02-26 16:29:56 +00:00
|
|
|
/// If nothing was added to conjunction, check if it is trivial.
|
2021-02-20 16:13:36 +00:00
|
|
|
if (allowed_nodes.count(predicate))
|
2021-02-20 17:42:06 +00:00
|
|
|
conjunction.allowed.push_back(predicate);
|
2021-02-10 16:26:49 +00:00
|
|
|
}
|
|
|
|
|
2021-02-20 16:13:36 +00:00
|
|
|
return conjunction;
|
|
|
|
}
|
2021-02-10 16:26:49 +00:00
|
|
|
|
2021-03-02 17:51:54 +00:00
|
|
|
ColumnsWithTypeAndName prepareFunctionArguments(const ActionsDAG::NodeRawConstPtrs & nodes)
|
2021-02-20 16:13:36 +00:00
|
|
|
{
|
|
|
|
ColumnsWithTypeAndName arguments;
|
|
|
|
arguments.reserve(nodes.size());
|
2021-02-10 16:26:49 +00:00
|
|
|
|
2021-02-20 16:13:36 +00:00
|
|
|
for (const auto * child : nodes)
|
|
|
|
{
|
|
|
|
ColumnWithTypeAndName argument;
|
|
|
|
argument.column = child->column;
|
|
|
|
argument.type = child->result_type;
|
|
|
|
argument.name = child->result_name;
|
2021-02-10 16:26:49 +00:00
|
|
|
|
2021-02-20 16:13:36 +00:00
|
|
|
arguments.emplace_back(std::move(argument));
|
|
|
|
}
|
|
|
|
|
|
|
|
return arguments;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Create actions which calculate conjunction of selected nodes.
|
|
|
|
/// Assume conjunction nodes are predicates (and may be used as arguments of function AND).
|
|
|
|
///
|
|
|
|
/// Result actions add single column with conjunction result (it is always last in index).
|
|
|
|
/// No other columns are added or removed.
|
2021-03-02 17:08:59 +00:00
|
|
|
ActionsDAGPtr ActionsDAG::cloneActionsForConjunction(NodeRawConstPtrs conjunction)
|
2021-02-20 16:13:36 +00:00
|
|
|
{
|
|
|
|
if (conjunction.empty())
|
|
|
|
return nullptr;
|
|
|
|
|
2021-03-02 17:08:59 +00:00
|
|
|
auto actions = std::make_shared<ActionsDAG>();
|
2021-02-20 16:13:36 +00:00
|
|
|
|
|
|
|
FunctionOverloadResolverPtr func_builder_and =
|
|
|
|
std::make_shared<FunctionOverloadResolverAdaptor>(
|
|
|
|
std::make_unique<DefaultOverloadResolver>(
|
|
|
|
std::make_shared<FunctionAnd>()));
|
|
|
|
|
2021-03-02 17:08:59 +00:00
|
|
|
std::unordered_map<const ActionsDAG::Node *, const ActionsDAG::Node *> nodes_mapping;
|
2021-02-20 16:13:36 +00:00
|
|
|
|
|
|
|
struct Frame
|
2021-02-10 16:26:49 +00:00
|
|
|
{
|
2021-02-20 16:13:36 +00:00
|
|
|
const ActionsDAG::Node * node;
|
|
|
|
size_t next_child_to_visit = 0;
|
|
|
|
};
|
2021-02-10 16:26:49 +00:00
|
|
|
|
2021-02-20 16:13:36 +00:00
|
|
|
std::stack<Frame> stack;
|
2021-02-10 16:26:49 +00:00
|
|
|
|
2021-02-20 16:13:36 +00:00
|
|
|
/// DFS. Clone actions.
|
|
|
|
for (const auto * predicate : conjunction)
|
|
|
|
{
|
|
|
|
if (nodes_mapping.count(predicate))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
stack.push({.node = predicate});
|
2021-02-10 16:26:49 +00:00
|
|
|
while (!stack.empty())
|
|
|
|
{
|
|
|
|
auto & cur = stack.top();
|
|
|
|
/// At first, visit all children.
|
|
|
|
while (cur.next_child_to_visit < cur.node->children.size())
|
|
|
|
{
|
2021-03-02 17:08:59 +00:00
|
|
|
const auto * child = cur.node->children[cur.next_child_to_visit];
|
2021-02-10 16:26:49 +00:00
|
|
|
|
2021-02-20 16:13:36 +00:00
|
|
|
if (nodes_mapping.count(child) == 0)
|
2021-02-10 16:26:49 +00:00
|
|
|
{
|
2021-02-20 16:13:36 +00:00
|
|
|
stack.push({.node = child});
|
2021-02-10 16:26:49 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
++cur.next_child_to_visit;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (cur.next_child_to_visit == cur.node->children.size())
|
|
|
|
{
|
2021-02-20 16:13:36 +00:00
|
|
|
auto & node = actions->nodes.emplace_back(*cur.node);
|
|
|
|
nodes_mapping[cur.node] = &node;
|
|
|
|
|
|
|
|
for (auto & child : node.children)
|
|
|
|
child = nodes_mapping[child];
|
|
|
|
|
|
|
|
if (node.type == ActionType::INPUT)
|
2021-02-17 10:27:47 +00:00
|
|
|
{
|
2021-02-20 16:13:36 +00:00
|
|
|
actions->inputs.emplace_back(&node);
|
2021-03-02 17:08:59 +00:00
|
|
|
actions->index.push_back(&node);
|
2021-02-10 16:26:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
stack.pop();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-02 17:08:59 +00:00
|
|
|
const Node * result_predicate = nodes_mapping[*conjunction.begin()];
|
2021-02-20 16:13:36 +00:00
|
|
|
|
|
|
|
if (conjunction.size() > 1)
|
2021-02-10 16:26:49 +00:00
|
|
|
{
|
2021-03-02 17:51:54 +00:00
|
|
|
NodeRawConstPtrs args;
|
2021-02-20 16:13:36 +00:00
|
|
|
args.reserve(conjunction.size());
|
|
|
|
for (const auto * predicate : conjunction)
|
|
|
|
args.emplace_back(nodes_mapping[predicate]);
|
|
|
|
|
2021-03-02 17:51:54 +00:00
|
|
|
result_predicate = &actions->addFunction(func_builder_and, std::move(args), {});
|
2021-02-10 16:26:49 +00:00
|
|
|
}
|
|
|
|
|
2021-03-02 17:51:54 +00:00
|
|
|
actions->index.push_back(result_predicate);
|
2021-02-20 16:13:36 +00:00
|
|
|
return actions;
|
|
|
|
}
|
2021-02-17 10:27:47 +00:00
|
|
|
|
2021-02-20 16:13:36 +00:00
|
|
|
ActionsDAGPtr ActionsDAG::splitActionsForFilter(const std::string & filter_name, bool can_remove_filter, const Names & available_inputs)
|
|
|
|
{
|
|
|
|
Node * predicate;
|
2021-02-17 10:27:47 +00:00
|
|
|
|
2021-02-20 16:13:36 +00:00
|
|
|
{
|
|
|
|
auto it = index.begin();
|
|
|
|
for (; it != index.end(); ++it)
|
|
|
|
if ((*it)->result_name == filter_name)
|
|
|
|
break;
|
2021-02-10 16:26:49 +00:00
|
|
|
|
2021-02-20 16:13:36 +00:00
|
|
|
if (it == index.end())
|
|
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
|
|
|
"Index for ActionsDAG does not contain filter column name {}. DAG:\n{}",
|
|
|
|
filter_name, dumpDAG());
|
2021-02-10 17:47:48 +00:00
|
|
|
|
2021-03-02 17:08:59 +00:00
|
|
|
predicate = const_cast<Node *>(*it);
|
2021-02-20 16:13:36 +00:00
|
|
|
}
|
2021-02-10 16:26:49 +00:00
|
|
|
|
2021-02-20 16:13:36 +00:00
|
|
|
std::unordered_set<const Node *> allowed_nodes;
|
2021-02-10 16:26:49 +00:00
|
|
|
|
2021-02-20 16:13:36 +00:00
|
|
|
/// Get input nodes from available_inputs names.
|
|
|
|
{
|
|
|
|
std::unordered_map<std::string_view, std::list<const Node *>> inputs_map;
|
|
|
|
for (const auto & input : inputs)
|
|
|
|
inputs_map[input->result_name].emplace_back(input);
|
2021-02-10 16:26:49 +00:00
|
|
|
|
2021-02-20 16:13:36 +00:00
|
|
|
for (const auto & name : available_inputs)
|
2021-02-10 16:26:49 +00:00
|
|
|
{
|
2021-02-20 16:13:36 +00:00
|
|
|
auto & inputs_list = inputs_map[name];
|
|
|
|
if (inputs_list.empty())
|
2021-02-10 16:26:49 +00:00
|
|
|
continue;
|
|
|
|
|
2021-02-20 16:13:36 +00:00
|
|
|
allowed_nodes.emplace(inputs_list.front());
|
|
|
|
inputs_list.pop_front();
|
2021-02-10 16:26:49 +00:00
|
|
|
}
|
2021-02-20 16:13:36 +00:00
|
|
|
}
|
2021-02-10 16:26:49 +00:00
|
|
|
|
2021-02-26 16:29:56 +00:00
|
|
|
auto conjunction = getConjunctionNodes(predicate, allowed_nodes);
|
2021-02-20 16:13:36 +00:00
|
|
|
auto actions = cloneActionsForConjunction(conjunction.allowed);
|
|
|
|
if (!actions)
|
|
|
|
return nullptr;
|
2021-02-10 16:26:49 +00:00
|
|
|
|
2021-02-20 16:13:36 +00:00
|
|
|
/// Now, when actions are created, update current DAG.
|
2021-02-10 16:26:49 +00:00
|
|
|
|
2021-02-20 17:42:06 +00:00
|
|
|
if (conjunction.rejected.empty())
|
2021-02-10 16:26:49 +00:00
|
|
|
{
|
2021-02-10 17:47:48 +00:00
|
|
|
/// The whole predicate was split.
|
|
|
|
if (can_remove_filter)
|
|
|
|
{
|
2021-02-20 16:13:36 +00:00
|
|
|
/// If filter column is not needed, remove it from index.
|
2021-02-10 17:47:48 +00:00
|
|
|
for (auto i = index.begin(); i != index.end(); ++i)
|
|
|
|
{
|
2021-02-20 16:13:36 +00:00
|
|
|
if (*i == predicate)
|
2021-02-10 17:47:48 +00:00
|
|
|
{
|
2021-03-02 17:08:59 +00:00
|
|
|
index.erase(i);
|
2021-02-10 17:47:48 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2021-02-20 16:13:36 +00:00
|
|
|
/// Replace predicate result to constant 1.
|
2021-02-10 17:47:48 +00:00
|
|
|
Node node;
|
|
|
|
node.type = ActionType::COLUMN;
|
2021-02-20 16:13:36 +00:00
|
|
|
node.result_name = std::move(predicate->result_name);
|
|
|
|
node.result_type = std::move(predicate->result_type);
|
2021-02-10 17:47:48 +00:00
|
|
|
node.column = node.result_type->createColumnConst(0, 1);
|
2021-02-20 16:13:36 +00:00
|
|
|
*predicate = std::move(node);
|
2021-02-10 17:47:48 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
removeUnusedActions(false);
|
2021-02-10 16:26:49 +00:00
|
|
|
}
|
2021-02-20 16:13:36 +00:00
|
|
|
else
|
2021-02-10 17:47:48 +00:00
|
|
|
{
|
2021-02-20 16:13:36 +00:00
|
|
|
/// Predicate is conjunction, where both allowed and rejected sets are not empty.
|
|
|
|
/// Replace this node to conjunction of rejected predicates.
|
|
|
|
|
2021-03-02 17:08:59 +00:00
|
|
|
NodeRawConstPtrs new_children = std::move(conjunction.rejected);
|
2021-02-10 16:26:49 +00:00
|
|
|
|
2021-02-10 17:47:48 +00:00
|
|
|
if (new_children.size() == 1)
|
|
|
|
{
|
2021-02-20 16:13:36 +00:00
|
|
|
/// Rejected set has only one predicate.
|
|
|
|
if (new_children.front()->result_type->equals(*predicate->result_type))
|
2021-02-10 17:47:48 +00:00
|
|
|
{
|
2021-02-20 16:13:36 +00:00
|
|
|
/// If it's type is same, just add alias.
|
2021-02-10 17:47:48 +00:00
|
|
|
Node node;
|
|
|
|
node.type = ActionType::ALIAS;
|
2021-02-20 16:13:36 +00:00
|
|
|
node.result_name = predicate->result_name;
|
|
|
|
node.result_type = predicate->result_type;
|
2021-02-10 17:47:48 +00:00
|
|
|
node.children.swap(new_children);
|
2021-02-20 16:13:36 +00:00
|
|
|
*predicate = std::move(node);
|
2021-02-10 17:47:48 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2021-02-20 16:13:36 +00:00
|
|
|
/// If type is different, cast column.
|
|
|
|
/// This case is possible, cause AND can use any numeric type as argument.
|
2021-02-11 15:44:10 +00:00
|
|
|
Node node;
|
|
|
|
node.type = ActionType::COLUMN;
|
2021-02-20 16:13:36 +00:00
|
|
|
node.result_name = predicate->result_type->getName();
|
2021-02-11 15:44:10 +00:00
|
|
|
node.column = DataTypeString().createColumnConst(0, node.result_name);
|
|
|
|
node.result_type = std::make_shared<DataTypeString>();
|
|
|
|
|
2021-03-02 17:08:59 +00:00
|
|
|
const auto * right_arg = &nodes.emplace_back(std::move(node));
|
|
|
|
const auto * left_arg = new_children.front();
|
2021-02-11 15:44:10 +00:00
|
|
|
|
2021-02-20 16:13:36 +00:00
|
|
|
predicate->children = {left_arg, right_arg};
|
|
|
|
auto arguments = prepareFunctionArguments(predicate->children);
|
2021-02-10 17:47:48 +00:00
|
|
|
|
|
|
|
FunctionOverloadResolverPtr func_builder_cast =
|
|
|
|
std::make_shared<FunctionOverloadResolverAdaptor>(
|
|
|
|
CastOverloadResolver<CastType::nonAccurate>::createImpl(false));
|
|
|
|
|
2021-02-20 16:13:36 +00:00
|
|
|
predicate->function_builder = func_builder_cast;
|
|
|
|
predicate->function_base = predicate->function_builder->build(arguments);
|
|
|
|
predicate->function = predicate->function_base->prepare(arguments);
|
2021-02-10 17:47:48 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2021-02-20 16:13:36 +00:00
|
|
|
/// Predicate is function AND, which still have more then one argument.
|
|
|
|
/// Just update children and rebuild it.
|
|
|
|
predicate->children.swap(new_children);
|
|
|
|
auto arguments = prepareFunctionArguments(predicate->children);
|
2021-02-10 17:47:48 +00:00
|
|
|
|
2021-02-20 16:13:36 +00:00
|
|
|
predicate->function_base = predicate->function_builder->build(arguments);
|
|
|
|
predicate->function = predicate->function_base->prepare(arguments);
|
2021-02-10 17:47:48 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
removeUnusedActions(false);
|
|
|
|
}
|
2021-02-10 16:26:49 +00:00
|
|
|
|
|
|
|
return actions;
|
|
|
|
}
|
|
|
|
|
2020-11-16 14:57:56 +00:00
|
|
|
}
|