Remove index by name from ActionsDAG

This commit is contained in:
Nikolai Kochetov 2021-03-02 20:08:59 +03:00
parent 15b3f379a5
commit ffaf9da2db
2 changed files with 264 additions and 321 deletions

View File

@ -32,7 +32,7 @@ namespace ErrorCodes
ActionsDAG::ActionsDAG(const NamesAndTypesList & inputs_) ActionsDAG::ActionsDAG(const NamesAndTypesList & inputs_)
{ {
for (const auto & input : inputs_) for (const auto & input : inputs_)
addInput(input.name, input.type, true); addInput(input.name, input.type);
} }
ActionsDAG::ActionsDAG(const ColumnsWithTypeAndName & inputs_) ActionsDAG::ActionsDAG(const ColumnsWithTypeAndName & inputs_)
@ -41,7 +41,7 @@ ActionsDAG::ActionsDAG(const ColumnsWithTypeAndName & inputs_)
{ {
if (input.column && isColumnConst(*input.column)) if (input.column && isColumnConst(*input.column))
{ {
addInput(input, true); addInput(input);
/// Here we also add column. /// Here we also add column.
/// It will allow to remove input which is actually constant (after projection). /// It will allow to remove input which is actually constant (after projection).
@ -49,49 +49,49 @@ ActionsDAG::ActionsDAG(const ColumnsWithTypeAndName & inputs_)
/// without any respect to header structure. So, it is a way to drop materialized column and use /// without any respect to header structure. So, it is a way to drop materialized column and use
/// constant value from header. /// constant value from header.
/// We cannot remove such input right now cause inputs positions are important in some cases. /// We cannot remove such input right now cause inputs positions are important in some cases.
addColumn(input, true); addColumn(input);
} }
else else
addInput(input.name, input.type, true); addInput(input.name, input.type);
} }
} }
ActionsDAG::Node & ActionsDAG::addNode(Node node, bool can_replace, bool add_to_index) ActionsDAG::Node & ActionsDAG::addNode(Node node)
{ {
auto it = index.find(node.result_name); // auto it = index.find(node.result_name);
if (it != index.end() && !can_replace && add_to_index) // if (it != index.end() && !can_replace && add_to_index)
throw Exception("Column '" + node.result_name + "' already exists", ErrorCodes::DUPLICATE_COLUMN); // throw Exception("Column '" + node.result_name + "' already exists", ErrorCodes::DUPLICATE_COLUMN);
auto & res = nodes.emplace_back(std::move(node)); auto & res = nodes.emplace_back(std::move(node));
if (res.type == ActionType::INPUT) if (res.type == ActionType::INPUT)
inputs.emplace_back(&res); inputs.emplace_back(&res);
if (add_to_index) // if (add_to_index)
index.replace(&res); // index.replace(&res);
return res; return res;
} }
ActionsDAG::Node & ActionsDAG::getNode(const std::string & name) // ActionsDAG::Node & ActionsDAG::getNode(const std::string & name)
{ // {
auto it = index.find(name); // auto it = index.find(name);
if (it == index.end()) // if (it == index.end())
throw Exception("Unknown identifier: '" + name + "'", ErrorCodes::UNKNOWN_IDENTIFIER); // throw Exception("Unknown identifier: '" + name + "'", ErrorCodes::UNKNOWN_IDENTIFIER);
return **it; // return **it;
} // }
const ActionsDAG::Node & ActionsDAG::addInput(std::string name, DataTypePtr type, bool can_replace) const ActionsDAG::Node & ActionsDAG::addInput(std::string name, DataTypePtr type)
{ {
Node node; Node node;
node.type = ActionType::INPUT; node.type = ActionType::INPUT;
node.result_type = std::move(type); node.result_type = std::move(type);
node.result_name = std::move(name); node.result_name = std::move(name);
return addNode(std::move(node), can_replace); return addNode(std::move(node));
} }
const ActionsDAG::Node & ActionsDAG::addInput(ColumnWithTypeAndName column, bool can_replace) const ActionsDAG::Node & ActionsDAG::addInput(ColumnWithTypeAndName column)
{ {
Node node; Node node;
node.type = ActionType::INPUT; node.type = ActionType::INPUT;
@ -99,10 +99,10 @@ const ActionsDAG::Node & ActionsDAG::addInput(ColumnWithTypeAndName column, bool
node.result_name = std::move(column.name); node.result_name = std::move(column.name);
node.column = std::move(column.column); node.column = std::move(column.column);
return addNode(std::move(node), can_replace); return addNode(std::move(node));
} }
const ActionsDAG::Node & ActionsDAG::addColumn(ColumnWithTypeAndName column, bool can_replace, bool materialize) const ActionsDAG::Node & ActionsDAG::addColumn(ColumnWithTypeAndName column)
{ {
if (!column.column) if (!column.column)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add column {} because it is nullptr", column.name); throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add column {} because it is nullptr", column.name);
@ -113,30 +113,25 @@ const ActionsDAG::Node & ActionsDAG::addColumn(ColumnWithTypeAndName column, boo
node.result_name = std::move(column.name); node.result_name = std::move(column.name);
node.column = std::move(column.column); node.column = std::move(column.column);
auto * res = &addNode(std::move(node), can_replace, !materialize); auto * res = &addNode(std::move(node));
if (materialize) // if (materialize)
{ // {
auto & name = res->result_name; // auto & name = res->result_name;
FunctionOverloadResolverPtr func_builder_materialize = // FunctionOverloadResolverPtr func_builder_materialize =
std::make_shared<FunctionOverloadResolverAdaptor>( // std::make_shared<FunctionOverloadResolverAdaptor>(
std::make_unique<DefaultOverloadResolver>( // std::make_unique<DefaultOverloadResolver>(
std::make_shared<FunctionMaterialize>())); // std::make_shared<FunctionMaterialize>()));
res = &addFunction(func_builder_materialize, {res}, {}, true, false); // res = &addFunction(func_builder_materialize, {res}, {}, true, false);
res = &addAlias(*res, name, true); // res = &addAlias(*res, name, true);
} // }
return *res; return *res;
} }
const ActionsDAG::Node & ActionsDAG::addAlias(const std::string & name, std::string alias, bool can_replace) const ActionsDAG::Node & ActionsDAG::addAlias(const Node & child, std::string alias)
{
return addAlias(getNode(name), alias, can_replace);
}
ActionsDAG::Node & ActionsDAG::addAlias(Node & child, std::string alias, bool can_replace)
{ {
Node node; Node node;
node.type = ActionType::ALIAS; node.type = ActionType::ALIAS;
@ -145,13 +140,11 @@ ActionsDAG::Node & ActionsDAG::addAlias(Node & child, std::string alias, bool ca
node.column = child.column; node.column = child.column;
node.children.emplace_back(&child); node.children.emplace_back(&child);
return addNode(std::move(node), can_replace); return addNode(std::move(node));
} }
const ActionsDAG::Node & ActionsDAG::addArrayJoin(const std::string & source_name, std::string result_name) const ActionsDAG::Node & ActionsDAG::addArrayJoin(const Node & child, std::string result_name)
{ {
auto & child = getNode(source_name);
const DataTypeArray * array_type = typeid_cast<const DataTypeArray *>(child.result_type.get()); const DataTypeArray * array_type = typeid_cast<const DataTypeArray *>(child.result_type.get());
if (!array_type) if (!array_type)
throw Exception("ARRAY JOIN requires array argument", ErrorCodes::TYPE_MISMATCH); throw Exception("ARRAY JOIN requires array argument", ErrorCodes::TYPE_MISMATCH);
@ -167,37 +160,8 @@ const ActionsDAG::Node & ActionsDAG::addArrayJoin(const std::string & source_nam
const ActionsDAG::Node & ActionsDAG::addFunction( const ActionsDAG::Node & ActionsDAG::addFunction(
const FunctionOverloadResolverPtr & function, const FunctionOverloadResolverPtr & function,
const Names & argument_names, NodeRawConstPtrs children,
std::string result_name, std::string result_name)
const Context & context [[maybe_unused]],
bool can_replace)
{
const auto & all_settings = context.getSettingsRef();
settings.max_temporary_columns = all_settings.max_temporary_columns;
settings.max_temporary_non_const_columns = all_settings.max_temporary_non_const_columns;
#if USE_EMBEDDED_COMPILER
settings.compile_expressions = all_settings.compile_expressions;
settings.min_count_to_compile_expression = all_settings.min_count_to_compile_expression;
if (!compilation_cache)
compilation_cache = context.getCompiledExpressionCache();
#endif
Inputs children;
children.reserve(argument_names.size());
for (const auto & name : argument_names)
children.push_back(&getNode(name));
return addFunction(function, children, std::move(result_name), can_replace);
}
ActionsDAG::Node & ActionsDAG::addFunction(
const FunctionOverloadResolverPtr & function,
Inputs children,
std::string result_name,
bool can_replace,
bool add_to_index)
{ {
size_t num_arguments = children.size(); size_t num_arguments = children.size();
@ -229,10 +193,7 @@ ActionsDAG::Node & ActionsDAG::addFunction(
node.function = node.function_base->prepare(arguments); node.function = node.function_base->prepare(arguments);
/// If all arguments are constants, and function is suitable to be executed in 'prepare' stage - execute function. /// If all arguments are constants, and function is suitable to be executed in 'prepare' stage - execute function.
/// But if we compile expressions compiled version of this function maybe placed in cache, if (all_const && node.function_base->isSuitableForConstantFolding())
/// so we don't want to unfold non deterministic functions
if (all_const && node.function_base->isSuitableForConstantFolding()
&& (!settings.compile_expressions || node.function_base->isDeterministic()))
{ {
size_t num_rows = arguments.empty() ? 0 : arguments.front().column->size(); size_t num_rows = arguments.empty() ? 0 : arguments.front().column->size();
auto col = node.function->execute(arguments, node.result_type, num_rows, true); auto col = node.function->execute(arguments, node.result_type, num_rows, true);
@ -277,7 +238,7 @@ ActionsDAG::Node & ActionsDAG::addFunction(
node.result_name = std::move(result_name); node.result_name = std::move(result_name);
return addNode(std::move(node), can_replace, add_to_index); return addNode(std::move(node));
} }
@ -333,35 +294,26 @@ std::string ActionsDAG::dumpNames() const
void ActionsDAG::removeUnusedActions(const Names & required_names) void ActionsDAG::removeUnusedActions(const Names & required_names)
{ {
std::unordered_set<Node *> nodes_set; NodeRawConstPtrs required_nodes;
std::vector<Node *> required_nodes;
required_nodes.reserve(required_names.size()); required_nodes.reserve(required_names.size());
std::unordered_map<std::string_view, std::list<const Node *>> names_map;
for (const auto * node : index)
names_map[node->result_name].push_back(node);
for (const auto & name : required_names) for (const auto & name : required_names)
{ {
auto it = index.find(name); auto & nodes_list = names_map[name];
if (it == index.end()) if (nodes_list.empty())
throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER,
"Unknown column: {}, there are only columns {}", name, dumpNames()); "Unknown column: {}, there are only columns {}", name, dumpNames());
if (nodes_set.insert(*it).second) required_nodes.push_back(nodes_list.front());
required_nodes.push_back(*it); nodes_list.pop_back();
} }
removeUnusedActions(required_nodes); removeUnusedActions(required_nodes);
} index.swap(required_nodes);
void ActionsDAG::removeUnusedActions(const std::vector<Node *> & required_nodes)
{
{
Index new_index;
for (auto * node : required_nodes)
new_index.insert(node);
index.swap(new_index);
}
removeUnusedActions(); removeUnusedActions();
} }
@ -373,7 +325,7 @@ void ActionsDAG::removeUnusedActions(bool allow_remove_inputs)
for (auto * node : index) for (auto * node : index)
{ {
visited_nodes.insert(node); visited_nodes.insert(node);
stack.push(node); stack.push(const_cast<Node *>(node));
} }
for (auto & node : nodes) for (auto & node : nodes)
@ -421,22 +373,35 @@ void ActionsDAG::removeUnusedActions(bool allow_remove_inputs)
inputs.erase(it, inputs.end()); inputs.erase(it, inputs.end());
} }
void ActionsDAG::addAliases(const NamesWithAliases & aliases, std::vector<Node *> & result_nodes) void ActionsDAG::addAliases(const NamesWithAliases & aliases, bool project)
{ {
std::vector<Node *> required_nodes; std::unordered_map<std::string_view, std::list<const Node *>> names_map;
for (const auto * node : index)
names_map[node->result_name].push_back(node);
NodeRawConstPtrs required_nodes;
for (const auto & item : aliases) for (const auto & item : aliases)
{ {
auto & child = getNode(item.first); auto & nodes_list = names_map[item.first];
required_nodes.push_back(&child); if (nodes_list.empty())
throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER,
"Unknown column: {}, there are only columns {}", name, dumpNames());
const auto * child = nodes_list.front();
nodes_list.pop_front();
required_nodes.push_back(child);
} }
result_nodes.reserve(aliases.size()); if (project)
index.clear();
index.reserve(index.size() + aliases.size());
for (size_t i = 0; i < aliases.size(); ++i) for (size_t i = 0; i < aliases.size(); ++i)
{ {
const auto & item = aliases[i]; const auto & item = aliases[i];
auto * child = required_nodes[i]; const auto * child = required_nodes[i];
if (!item.second.empty() && item.first != item.second) if (!item.second.empty() && item.first != item.second)
{ {
@ -447,40 +412,39 @@ void ActionsDAG::addAliases(const NamesWithAliases & aliases, std::vector<Node *
node.column = child->column; node.column = child->column;
node.children.emplace_back(child); node.children.emplace_back(child);
auto & alias = addNode(std::move(node), true); auto & alias = addNode(std::move(node));
result_nodes.push_back(&alias); index.push_back(&alias);
} }
else else if (project)
result_nodes.push_back(child); index.push_back(child);
} }
} }
void ActionsDAG::addAliases(const NamesWithAliases & aliases) void ActionsDAG::addAliases(const NamesWithAliases & aliases)
{ {
std::vector<Node *> result_nodes; addAliases(aliases, false);
addAliases(aliases, result_nodes);
} }
void ActionsDAG::project(const NamesWithAliases & projection) void ActionsDAG::project(const NamesWithAliases & projection)
{ {
std::vector<Node *> result_nodes; addAliases(projection, true);
addAliases(projection, result_nodes); removeUnusedActions();
removeUnusedActions(result_nodes);
projectInput(); projectInput();
settings.projected_output = true; projected_output = true;
} }
bool ActionsDAG::tryRestoreColumn(const std::string & column_name) bool ActionsDAG::tryRestoreColumn(const std::string & column_name)
{ {
if (index.contains(column_name)) for (const auto * node : index)
return true; if (node->result_name == column_name)
return true;
for (auto it = nodes.rbegin(); it != nodes.rend(); ++it) for (auto it = nodes.rbegin(); it != nodes.rend(); ++it)
{ {
auto & node = *it; auto & node = *it;
if (node.result_name == column_name) if (node.result_name == column_name)
{ {
index.replace(&node); index.push_back(&node);
return true; return true;
} }
} }
@ -502,7 +466,7 @@ bool ActionsDAG::removeUnusedResult(const std::string & column_name)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Not found result {} in ActionsDAG\n{}", column_name, dumpDAG()); throw Exception(ErrorCodes::LOGICAL_ERROR, "Not found result {} in ActionsDAG\n{}", column_name, dumpDAG());
col = *it; col = *it;
index.remove(it); index.erase(it);
} }
/// Check if column is in input. /// Check if column is in input.
@ -541,7 +505,9 @@ bool ActionsDAG::removeUnusedResult(const std::string & column_name)
ActionsDAGPtr ActionsDAG::clone() const ActionsDAGPtr ActionsDAG::clone() const
{ {
auto actions = cloneEmpty(); auto actions = std::make_shared<ActionsDAG>();
actions.project_input = project_input;
actions.projected_output = projected_output;
std::unordered_map<const Node *, Node *> copy_map; std::unordered_map<const Node *, Node *> copy_map;
@ -556,7 +522,7 @@ ActionsDAGPtr ActionsDAG::clone() const
child = copy_map[child]; child = copy_map[child];
for (const auto & node : index) for (const auto & node : index)
actions->index.insert(copy_map[node]); actions->index.push_back(copy_map[node]);
for (const auto & node : inputs) for (const auto & node : inputs)
actions->inputs.push_back(copy_map[node]); actions->inputs.push_back(copy_map[node]);
@ -671,17 +637,12 @@ void ActionsDAG::addMaterializingOutputActions()
std::make_unique<DefaultOverloadResolver>( std::make_unique<DefaultOverloadResolver>(
std::make_shared<FunctionMaterialize>())); std::make_shared<FunctionMaterialize>()));
Index new_index; for (auto & node : index)
std::vector<Node *> index_nodes(index.begin(), index.end());
for (auto * node : index_nodes)
{ {
auto & name = node->result_name; auto & name = node->result_name;
node = &addFunction(func_builder_materialize, {node}, {}, true, false); node = &addFunction(func_builder_materialize, {node}, {});
node = &addAlias(*node, name, true); node = &addAlias(*node, name);
new_index.insert(node);
} }
index.swap(new_index);
} }
ActionsDAGPtr ActionsDAG::makeConvertingActions( ActionsDAGPtr ActionsDAG::makeConvertingActions(
@ -697,7 +658,7 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions(
throw Exception("Number of columns doesn't match", ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH); throw Exception("Number of columns doesn't match", ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH);
auto actions_dag = std::make_shared<ActionsDAG>(source); auto actions_dag = std::make_shared<ActionsDAG>(source);
std::vector<Node *> projection(num_result_columns); NodeRawConstPtrs projection(num_result_columns);
FunctionOverloadResolverPtr func_builder_materialize = FunctionOverloadResolverPtr func_builder_materialize =
std::make_shared<FunctionOverloadResolverAdaptor>( std::make_shared<FunctionOverloadResolverAdaptor>(
@ -714,7 +675,7 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions(
for (size_t result_col_num = 0; result_col_num < num_result_columns; ++result_col_num) for (size_t result_col_num = 0; result_col_num < num_result_columns; ++result_col_num)
{ {
const auto & res_elem = result[result_col_num]; const auto & res_elem = result[result_col_num];
Node * src_node = nullptr; const Node * src_node = nullptr;
switch (mode) switch (mode)
{ {
@ -743,7 +704,7 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions(
if (const auto * src_const = typeid_cast<const ColumnConst *>(src_node->column.get())) if (const auto * src_const = typeid_cast<const ColumnConst *>(src_node->column.get()))
{ {
if (ignore_constant_values) if (ignore_constant_values)
src_node = const_cast<Node *>(&actions_dag->addColumn(res_elem, true)); src_node = &actions_dag->addColumn(res_elem);
else if (res_const->getField() != src_const->getField()) else if (res_const->getField() != src_const->getField())
throw Exception("Cannot convert column " + backQuote(res_elem.name) + " because " throw Exception("Cannot convert column " + backQuote(res_elem.name) + " because "
"it is constant but values of constants are different in source and result", "it is constant but values of constants are different in source and result",
@ -763,31 +724,32 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions(
column.column = DataTypeString().createColumnConst(0, column.name); column.column = DataTypeString().createColumnConst(0, column.name);
column.type = std::make_shared<DataTypeString>(); column.type = std::make_shared<DataTypeString>();
auto * right_arg = const_cast<Node *>(&actions_dag->addColumn(std::move(column), true)); const auto * right_arg = &actions_dag->addColumn(std::move(column));
auto * left_arg = src_node; const auto * left_arg = src_node;
FunctionCast::Diagnostic diagnostic = {src_node->result_name, res_elem.name}; FunctionCast::Diagnostic diagnostic = {src_node->result_name, res_elem.name};
FunctionOverloadResolverPtr func_builder_cast = FunctionOverloadResolverPtr func_builder_cast =
std::make_shared<FunctionOverloadResolverAdaptor>( std::make_shared<FunctionOverloadResolverAdaptor>(
CastOverloadResolver<CastType::nonAccurate>::createImpl(false, std::move(diagnostic))); CastOverloadResolver<CastType::nonAccurate>::createImpl(false, std::move(diagnostic)));
Inputs children = { left_arg, right_arg }; NodeRawConstPtrs children = { left_arg, right_arg };
src_node = &actions_dag->addFunction(func_builder_cast, std::move(children), {}, true); src_node = &actions_dag->addFunction(func_builder_cast, std::move(children), {});
} }
if (src_node->column && isColumnConst(*src_node->column) && !(res_elem.column && isColumnConst(*res_elem.column))) if (src_node->column && isColumnConst(*src_node->column) && !(res_elem.column && isColumnConst(*res_elem.column)))
{ {
Inputs children = {src_node}; NodeRawConstPtrs children = {src_node};
src_node = &actions_dag->addFunction(func_builder_materialize, std::move(children), {}, true); src_node = &actions_dag->addFunction(func_builder_materialize, std::move(children), {});
} }
if (src_node->result_name != res_elem.name) if (src_node->result_name != res_elem.name)
src_node = &actions_dag->addAlias(*src_node, res_elem.name, true); src_node = &actions_dag->addAlias(*src_node, res_elem.name);
projection[result_col_num] = src_node; projection[result_col_num] = src_node;
} }
actions_dag->removeUnusedActions(projection); actions_dag->index.swap(projection);
actions_dag->removeUnusedActions();
actions_dag->projectInput(); actions_dag->projectInput();
return actions_dag; return actions_dag;
@ -802,11 +764,12 @@ ActionsDAGPtr ActionsDAG::makeAddingColumnActions(ColumnWithTypeAndName column)
std::make_shared<FunctionMaterialize>())); std::make_shared<FunctionMaterialize>()));
auto column_name = column.name; auto column_name = column.name;
const auto & column_node = adding_column_action->addColumn(std::move(column)); const auto * column_node = &adding_column_action->addColumn(std::move(column));
Inputs inputs = {const_cast<Node *>(&column_node)}; NodeRawConstPtrs inputs = {column_node};
auto & function_node = adding_column_action->addFunction(func_builder_materialize, std::move(inputs), {}, true); auto & function_node = adding_column_action->addFunction(func_builder_materialize, std::move(inputs), {});
adding_column_action->addAlias(function_node, std::move(column_name), true); auto & alias_node = adding_column_action->addAlias(function_node, std::move(column_name));
adding_column_action.index->push_back(&alias_node);
return adding_column_action; return adding_column_action;
} }
@ -820,18 +783,18 @@ ActionsDAGPtr ActionsDAG::merge(ActionsDAG && first, ActionsDAG && second)
/// This map contains nodes which should be removed from `first` index, cause they are used as inputs for `second`. /// This map contains nodes which should be removed from `first` index, cause they are used as inputs for `second`.
/// The second element is the number of removes (cause one node may be repeated several times in result). /// The second element is the number of removes (cause one node may be repeated several times in result).
std::unordered_map<Node *, size_t> removed_first_result; std::unordered_map<const Node *, size_t> removed_first_result;
/// Map inputs of `second` to nodes of `first`. /// Map inputs of `second` to nodes of `first`.
std::unordered_map<Node *, Node *> inputs_map; std::unordered_map<const Node *, const Node *> inputs_map;
/// Update inputs list. /// Update inputs list.
{ {
/// Index may have multiple columns with same name. They also may be used by `second`. Order is important. /// Index may have multiple columns with same name. They also may be used by `second`. Order is important.
std::unordered_map<std::string_view, std::list<Node *>> first_result; std::unordered_map<std::string_view, std::list<const Node *>> first_result;
for (auto & node : first.index) for (const auto & node : first.index)
first_result[node->result_name].push_back(node); first_result[node->result_name].push_back(node);
for (auto & node : second.inputs) for (const auto & node : second.inputs)
{ {
auto it = first_result.find(node->result_name); auto it = first_result.find(node->result_name);
if (it == first_result.end() || it->second.empty()) if (it == first_result.end() || it->second.empty())
@ -852,9 +815,9 @@ ActionsDAGPtr ActionsDAG::merge(ActionsDAG && first, ActionsDAG && second)
} }
/// Replace inputs from `second` to nodes from `first` result. /// Replace inputs from `second` to nodes from `first` result.
for (auto & node : second.nodes) for (const auto & node : second.nodes)
{ {
for (auto & child : node.children) for (const auto & child : node.children)
{ {
if (child->type == ActionType::INPUT) if (child->type == ActionType::INPUT)
{ {
@ -865,7 +828,7 @@ ActionsDAGPtr ActionsDAG::merge(ActionsDAG && first, ActionsDAG && second)
} }
} }
for (auto & node : second.index) for (const auto & node : second.index)
{ {
if (node->type == ActionType::INPUT) if (node->type == ActionType::INPUT)
{ {
@ -876,49 +839,28 @@ ActionsDAGPtr ActionsDAG::merge(ActionsDAG && first, ActionsDAG && second)
} }
/// Update index. /// Update index.
if (second.settings.project_input) if (second.project_input)
{ {
first.index.swap(second.index); first.index.swap(second.index);
first.settings.project_input = true; first.project_input = true;
} }
else else
{ {
/// Remove `second` inputs from index. /// Add not removed result from first actions.
for (auto it = first.index.begin(); it != first.index.end();) for (const auto * node : first.index)
{ {
auto cur = it; auto it = removed_first_result.find(node);
++it; if (it != removed_first_result.end() && it->second > 0)
--it->second;
auto jt = removed_first_result.find(*cur); else
if (jt != removed_first_result.end() && jt->second > 0) second.index.push_back(node);
{
first.index.remove(cur);
--jt->second;
}
} }
for (auto it = second.index.rbegin(); it != second.index.rend(); ++it) first.index.swap(second.index);
first.index.prepend(*it);
} }
first.nodes.splice(first.nodes.end(), std::move(second.nodes)); first.nodes.splice(first.nodes.end(), std::move(second.nodes));
/// Here we rebuild index because some string_view from the first map now may point to string from second.
ActionsDAG::Index first_index;
for (auto * node : first.index)
first_index.insert(node);
first.index.swap(first_index);
#if USE_EMBEDDED_COMPILER
if (first.compilation_cache == nullptr)
first.compilation_cache = second.compilation_cache;
#endif
first.settings.max_temporary_columns = std::max(first.settings.max_temporary_columns, second.settings.max_temporary_columns);
first.settings.max_temporary_non_const_columns = std::max(first.settings.max_temporary_non_const_columns, second.settings.max_temporary_non_const_columns);
first.settings.min_count_to_compile_expression = std::max(first.settings.min_count_to_compile_expression, second.settings.min_count_to_compile_expression);
first.settings.projected_output = second.settings.projected_output; first.settings.projected_output = second.settings.projected_output;
/// Drop unused inputs and, probably, some actions. /// Drop unused inputs and, probably, some actions.
@ -932,13 +874,13 @@ ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set<const Node *> split
/// Split DAG into two parts. /// Split DAG into two parts.
/// (first_nodes, first_index) is a part which will have split_list in result. /// (first_nodes, first_index) is a part which will have split_list in result.
/// (second_nodes, second_index) is a part which will have same index as current actions. /// (second_nodes, second_index) is a part which will have same index as current actions.
std::list<Node> second_nodes; Nodes second_nodes;
std::list<Node> first_nodes; Nodes first_nodes;
Index second_index; NodeRawConstPtrs second_index;
Index first_index; NodeRawConstPtrs first_index;
/// List of nodes from current actions which are not inputs, but will be in second part. /// List of nodes from current actions which are not inputs, but will be in second part.
std::vector<const Node *> new_inputs; NodeRawConstPtrs new_inputs;
struct Frame struct Frame
{ {
@ -1096,13 +1038,13 @@ ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set<const Node *> split
} }
} }
for (auto * node : index) for (const auto * node : index)
second_index.insert(data[node].to_second); second_index.push_back(data[node].to_second);
Inputs second_inputs; NodeRawConstPtrs second_inputs;
Inputs first_inputs; NodeRawConstPtrs first_inputs;
for (auto * input : inputs) for (const auto * input : inputs)
{ {
const auto & cur = data[input]; const auto & cur = data[input];
first_inputs.push_back(cur.to_first); first_inputs.push_back(cur.to_first);
@ -1112,15 +1054,15 @@ ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set<const Node *> split
{ {
const auto & cur = data[input]; const auto & cur = data[input];
second_inputs.push_back(cur.to_second); second_inputs.push_back(cur.to_second);
first_index.insert(cur.to_first); first_index.push_back(cur.to_first);
} }
auto first_actions = cloneEmpty(); auto first_actions = std::make_shared<ActionsDAG>();
first_actions->nodes.swap(first_nodes); first_actions->nodes.swap(first_nodes);
first_actions->index.swap(first_index); first_actions->index.swap(first_index);
first_actions->inputs.swap(first_inputs); first_actions->inputs.swap(first_inputs);
auto second_actions = cloneEmpty(); auto second_actions = std::make_shared<ActionsDAG>();
second_actions->nodes.swap(second_nodes); second_actions->nodes.swap(second_nodes);
second_actions->index.swap(second_index); second_actions->index.swap(second_index);
second_actions->inputs.swap(second_inputs); second_actions->inputs.swap(second_inputs);
@ -1158,7 +1100,7 @@ ActionsDAG::SplitResult ActionsDAG::splitActionsBeforeArrayJoin(const NameSet &
/// At first, visit all children. We depend on ARRAY JOIN if any child does. /// At first, visit all children. We depend on ARRAY JOIN if any child does.
while (cur.next_child_to_visit < cur.node->children.size()) while (cur.next_child_to_visit < cur.node->children.size())
{ {
auto * child = cur.node->children[cur.next_child_to_visit]; const auto * child = cur.node->children[cur.next_child_to_visit];
if (visited_nodes.count(child) == 0) if (visited_nodes.count(child) == 0)
{ {
@ -1192,7 +1134,7 @@ ActionsDAG::SplitResult ActionsDAG::splitActionsBeforeArrayJoin(const NameSet &
auto res = split(split_nodes); auto res = split(split_nodes);
/// Do not remove array joined columns if they are not used. /// Do not remove array joined columns if they are not used.
res.first->settings.project_input = false; res.first->project_input = false;
return res; return res;
} }
@ -1217,8 +1159,8 @@ namespace
struct ConjunctionNodes struct ConjunctionNodes
{ {
std::vector<ActionsDAG::Node *> allowed; NodeRawConstPtrs allowed;
std::vector<ActionsDAG::Node *> rejected; NodeRawConstPtrs rejected;
}; };
/// Take a node which result is predicate. /// Take a node which result is predicate.
@ -1233,14 +1175,14 @@ ConjunctionNodes getConjunctionNodes(ActionsDAG::Node * predicate, std::unordere
struct Frame struct Frame
{ {
ActionsDAG::Node * node; const ActionsDAG::Node * node;
bool is_predicate = false; bool is_predicate = false;
size_t next_child_to_visit = 0; size_t next_child_to_visit = 0;
size_t num_allowed_children = 0; size_t num_allowed_children = 0;
}; };
std::stack<Frame> stack; std::stack<Frame> stack;
std::unordered_set<ActionsDAG::Node *> visited_nodes; std::unordered_set<const ActionsDAG::Node *> visited_nodes;
stack.push(Frame{.node = predicate, .is_predicate = true}); stack.push(Frame{.node = predicate, .is_predicate = true});
visited_nodes.insert(predicate); visited_nodes.insert(predicate);
@ -1254,7 +1196,7 @@ ConjunctionNodes getConjunctionNodes(ActionsDAG::Node * predicate, std::unordere
/// At first, visit all children. /// At first, visit all children.
while (cur.next_child_to_visit < cur.node->children.size()) while (cur.next_child_to_visit < cur.node->children.size())
{ {
auto * child = cur.node->children[cur.next_child_to_visit]; const auto * child = cur.node->children[cur.next_child_to_visit];
if (visited_nodes.count(child) == 0) if (visited_nodes.count(child) == 0)
{ {
@ -1277,7 +1219,7 @@ ConjunctionNodes getConjunctionNodes(ActionsDAG::Node * predicate, std::unordere
} }
else if (is_conjunction) else if (is_conjunction)
{ {
for (auto * child : cur.node->children) for (const auto * child : cur.node->children)
{ {
if (allowed_nodes.count(child)) if (allowed_nodes.count(child))
{ {
@ -1307,7 +1249,7 @@ ConjunctionNodes getConjunctionNodes(ActionsDAG::Node * predicate, std::unordere
return conjunction; return conjunction;
} }
ColumnsWithTypeAndName prepareFunctionArguments(const std::vector<ActionsDAG::Node *> nodes) ColumnsWithTypeAndName prepareFunctionArguments(const NodeRawConstPtrs & nodes)
{ {
ColumnsWithTypeAndName arguments; ColumnsWithTypeAndName arguments;
arguments.reserve(nodes.size()); arguments.reserve(nodes.size());
@ -1332,20 +1274,19 @@ ColumnsWithTypeAndName prepareFunctionArguments(const std::vector<ActionsDAG::No
/// ///
/// Result actions add single column with conjunction result (it is always last in index). /// Result actions add single column with conjunction result (it is always last in index).
/// No other columns are added or removed. /// No other columns are added or removed.
ActionsDAGPtr ActionsDAG::cloneActionsForConjunction(std::vector<Node *> conjunction) ActionsDAGPtr ActionsDAG::cloneActionsForConjunction(NodeRawConstPtrs conjunction)
{ {
if (conjunction.empty()) if (conjunction.empty())
return nullptr; return nullptr;
auto actions = cloneEmpty(); auto actions = std::make_shared<ActionsDAG>();
actions->settings.project_input = false;
FunctionOverloadResolverPtr func_builder_and = FunctionOverloadResolverPtr func_builder_and =
std::make_shared<FunctionOverloadResolverAdaptor>( std::make_shared<FunctionOverloadResolverAdaptor>(
std::make_unique<DefaultOverloadResolver>( std::make_unique<DefaultOverloadResolver>(
std::make_shared<FunctionAnd>())); std::make_shared<FunctionAnd>()));
std::unordered_map<const ActionsDAG::Node *, ActionsDAG::Node *> nodes_mapping; std::unordered_map<const ActionsDAG::Node *, const ActionsDAG::Node *> nodes_mapping;
struct Frame struct Frame
{ {
@ -1368,7 +1309,7 @@ ActionsDAGPtr ActionsDAG::cloneActionsForConjunction(std::vector<Node *> conjunc
/// At first, visit all children. /// At first, visit all children.
while (cur.next_child_to_visit < cur.node->children.size()) while (cur.next_child_to_visit < cur.node->children.size())
{ {
auto * child = cur.node->children[cur.next_child_to_visit]; const auto * child = cur.node->children[cur.next_child_to_visit];
if (nodes_mapping.count(child) == 0) if (nodes_mapping.count(child) == 0)
{ {
@ -1390,7 +1331,7 @@ ActionsDAGPtr ActionsDAG::cloneActionsForConjunction(std::vector<Node *> conjunc
if (node.type == ActionType::INPUT) if (node.type == ActionType::INPUT)
{ {
actions->inputs.emplace_back(&node); actions->inputs.emplace_back(&node);
actions->index.insert(&node); actions->index.push_back(&node);
} }
stack.pop(); stack.pop();
@ -1398,7 +1339,7 @@ ActionsDAGPtr ActionsDAG::cloneActionsForConjunction(std::vector<Node *> conjunc
} }
} }
Node * result_predicate = nodes_mapping[*conjunction.begin()]; const Node * result_predicate = nodes_mapping[*conjunction.begin()];
if (conjunction.size() > 1) if (conjunction.size() > 1)
{ {
@ -1407,7 +1348,7 @@ ActionsDAGPtr ActionsDAG::cloneActionsForConjunction(std::vector<Node *> conjunc
for (const auto * predicate : conjunction) for (const auto * predicate : conjunction)
args.emplace_back(nodes_mapping[predicate]); args.emplace_back(nodes_mapping[predicate]);
result_predicate = &actions->addFunction(func_builder_and, args, {}, true, false); result_predicate = &actions->addFunction(func_builder_and, args, {});
} }
actions->index.insert(result_predicate); actions->index.insert(result_predicate);
@ -1429,7 +1370,7 @@ ActionsDAGPtr ActionsDAG::splitActionsForFilter(const std::string & filter_name,
"Index for ActionsDAG does not contain filter column name {}. DAG:\n{}", "Index for ActionsDAG does not contain filter column name {}. DAG:\n{}",
filter_name, dumpDAG()); filter_name, dumpDAG());
predicate = *it; predicate = const_cast<Node *>(*it);
} }
std::unordered_set<const Node *> allowed_nodes; std::unordered_set<const Node *> allowed_nodes;
@ -1468,7 +1409,7 @@ ActionsDAGPtr ActionsDAG::splitActionsForFilter(const std::string & filter_name,
{ {
if (*i == predicate) if (*i == predicate)
{ {
index.remove(i); index.erase(i);
break; break;
} }
} }
@ -1491,7 +1432,7 @@ ActionsDAGPtr ActionsDAG::splitActionsForFilter(const std::string & filter_name,
/// Predicate is conjunction, where both allowed and rejected sets are not empty. /// Predicate is conjunction, where both allowed and rejected sets are not empty.
/// Replace this node to conjunction of rejected predicates. /// Replace this node to conjunction of rejected predicates.
std::vector<Node *> new_children(conjunction.rejected.begin(), conjunction.rejected.end()); NodeRawConstPtrs new_children = std::move(conjunction.rejected);
if (new_children.size() == 1) if (new_children.size() == 1)
{ {
@ -1516,8 +1457,8 @@ ActionsDAGPtr ActionsDAG::splitActionsForFilter(const std::string & filter_name,
node.column = DataTypeString().createColumnConst(0, node.result_name); node.column = DataTypeString().createColumnConst(0, node.result_name);
node.result_type = std::make_shared<DataTypeString>(); node.result_type = std::make_shared<DataTypeString>();
auto * right_arg = &nodes.emplace_back(std::move(node)); const auto * right_arg = &nodes.emplace_back(std::move(node));
auto * left_arg = new_children.front(); const auto * left_arg = new_children.front();
predicate->children = {left_arg, right_arg}; predicate->children = {left_arg, right_arg};
auto arguments = prepareFunctionArguments(predicate->children); auto arguments = prepareFunctionArguments(predicate->children);

View File

@ -55,9 +55,13 @@ public:
FUNCTION, FUNCTION,
}; };
struct Node;
using NodeRawPtrs = std::vector<Node *>;
using NodeRawConstPtrs = std::vector<const Node *>;
struct Node struct Node
{ {
std::vector<Node *> children; NodeRawConstPtrs children;
ActionType type; ActionType type;
@ -90,94 +94,95 @@ public:
/// ///
/// Index is a list of nodes + [map: name -> list::iterator]. /// Index is a list of nodes + [map: name -> list::iterator].
/// List is ordered, may contain nodes with same names, or one node several times. /// List is ordered, may contain nodes with same names, or one node several times.
class Index // class Index
{ // {
private: // private:
std::list<Node *> list; // std::list<Node *> list;
/// Map key is a string_view to Node::result_name for node from value. // /// Map key is a string_view to Node::result_name for node from value.
/// Map always point to existing node, so key always valid (nodes live longer then index). // /// Map always point to existing node, so key always valid (nodes live longer then index).
std::unordered_map<std::string_view, std::list<Node *>::iterator> map; // std::unordered_map<std::string_view, std::list<Node *>::iterator> map;
public: // public:
auto size() const { return list.size(); } // auto size() const { return list.size(); }
bool contains(std::string_view key) const { return map.count(key) != 0; } // bool contains(std::string_view key) const { return map.count(key) != 0; }
std::list<Node *>::iterator begin() { return list.begin(); } // std::list<Node *>::iterator begin() { return list.begin(); }
std::list<Node *>::iterator end() { return list.end(); } // std::list<Node *>::iterator end() { return list.end(); }
std::list<Node *>::const_iterator begin() const { return list.begin(); } // std::list<Node *>::const_iterator begin() const { return list.begin(); }
std::list<Node *>::const_iterator end() const { return list.end(); } // std::list<Node *>::const_iterator end() const { return list.end(); }
std::list<Node *>::const_reverse_iterator rbegin() const { return list.rbegin(); } // std::list<Node *>::const_reverse_iterator rbegin() const { return list.rbegin(); }
std::list<Node *>::const_reverse_iterator rend() const { return list.rend(); } // std::list<Node *>::const_reverse_iterator rend() const { return list.rend(); }
std::list<Node *>::const_iterator find(std::string_view key) const // std::list<Node *>::const_iterator find(std::string_view key) const
{ // {
auto it = map.find(key); // auto it = map.find(key);
if (it == map.end()) // if (it == map.end())
return list.end(); // return list.end();
return it->second; // return it->second;
} // }
/// Insert method doesn't check if map already have node with the same name. // /// Insert method doesn't check if map already have node with the same name.
/// If node with the same name exists, it is removed from map, but not list. // /// If node with the same name exists, it is removed from map, but not list.
/// It is expected and used for project(), when result may have several columns with the same name. // /// It is expected and used for project(), when result may have several columns with the same name.
void insert(Node * node) { map[node->result_name] = list.emplace(list.end(), node); } // void insert(Node * node) { map[node->result_name] = list.emplace(list.end(), node); }
void prepend(Node * node) { map[node->result_name] = list.emplace(list.begin(), node); } // void prepend(Node * node) { map[node->result_name] = list.emplace(list.begin(), node); }
/// If node with same name exists in index, replace it. Otherwise insert new node to index. // /// If node with same name exists in index, replace it. Otherwise insert new node to index.
void replace(Node * node) // void replace(Node * node)
{ // {
if (auto handle = map.extract(node->result_name)) // if (auto handle = map.extract(node->result_name))
{ // {
handle.key() = node->result_name; /// Change string_view // handle.key() = node->result_name; /// Change string_view
*handle.mapped() = node; // *handle.mapped() = node;
map.insert(std::move(handle)); // map.insert(std::move(handle));
} // }
else // else
insert(node); // insert(node);
} // }
void remove(std::list<Node *>::iterator it) // void remove(std::list<Node *>::iterator it)
{ // {
auto map_it = map.find((*it)->result_name); // auto map_it = map.find((*it)->result_name);
if (map_it != map.end() && map_it->second == it) // if (map_it != map.end() && map_it->second == it)
map.erase(map_it); // map.erase(map_it);
list.erase(it); // list.erase(it);
} // }
void swap(Index & other) // void swap(Index & other)
{ // {
list.swap(other.list); // list.swap(other.list);
map.swap(other.map); // map.swap(other.map);
} // }
}; // };
/// NOTE: std::list is an implementation detail. /// NOTE: std::list is an implementation detail.
/// It allows to add and remove new nodes inplace without reallocation. /// It allows to add and remove new nodes inplace without reallocation.
/// Raw pointers to nodes remain valid. /// Raw pointers to nodes remain valid.
using Nodes = std::list<Node>; using Nodes = std::list<Node>;
using Inputs = std::vector<Node *>; //using Inputs = std::vector<Node *>;
struct ActionsSettings // struct ActionsSettings
{ // {
size_t max_temporary_columns = 0; // size_t max_temporary_columns = 0;
size_t max_temporary_non_const_columns = 0; // size_t max_temporary_non_const_columns = 0;
size_t min_count_to_compile_expression = 0; // size_t min_count_to_compile_expression = 0;
bool compile_expressions = false; // bool compile_expressions = false;
bool project_input = false; // bool project_input = false;
bool projected_output = false; // bool projected_output = false;
}; // };
private: private:
Nodes nodes; Nodes nodes;
Index index; NodeRawConstPtrs index;
Inputs inputs; NodeRawConstPtrs inputs;
ActionsSettings settings; bool project_input = false;
bool projected_output = false;
#if USE_EMBEDDED_COMPILER // #if USE_EMBEDDED_COMPILER
std::shared_ptr<CompiledExpressionCache> compilation_cache; // std::shared_ptr<CompiledExpressionCache> compilation_cache;
#endif // #endif
public: public:
ActionsDAG() = default; ActionsDAG() = default;
@ -188,8 +193,8 @@ public:
explicit ActionsDAG(const ColumnsWithTypeAndName & inputs_); explicit ActionsDAG(const ColumnsWithTypeAndName & inputs_);
const Nodes & getNodes() const { return nodes; } const Nodes & getNodes() const { return nodes; }
const Index & getIndex() const { return index; } const NodeRawConstPtrs & getIndex() const { return index; }
const Inputs & getInputs() const { return inputs; } const NodeRawConstPtrs & getInputs() const { return inputs; }
NamesAndTypesList getRequiredColumns() const; NamesAndTypesList getRequiredColumns() const;
ColumnsWithTypeAndName getResultColumns() const; ColumnsWithTypeAndName getResultColumns() const;
@ -199,17 +204,15 @@ public:
std::string dumpNames() const; std::string dumpNames() const;
std::string dumpDAG() const; std::string dumpDAG() const;
const Node & addInput(std::string name, DataTypePtr type, bool can_replace = false); const Node & addInput(std::string name, DataTypePtr type);
const Node & addInput(ColumnWithTypeAndName column, bool can_replace = false); const Node & addInput(ColumnWithTypeAndName column);
const Node & addColumn(ColumnWithTypeAndName column, bool can_replace = false, bool materialize = false); const Node & addColumn(ColumnWithTypeAndName column /*, bool materialize = false*/);
const Node & addAlias(const std::string & name, std::string alias, bool can_replace = false); const Node & addAlias(const Node & child, std::string alias);
const Node & addArrayJoin(const std::string & source_name, std::string result_name); const Node & addArrayJoin(const Node & child, std::string result_name);
const Node & addFunction( const Node & addFunction(
const FunctionOverloadResolverPtr & function, const FunctionOverloadResolverPtr & function,
const Names & argument_names, NodeRawConstPtrs children,
std::string result_name, std::string result_name);
const Context & context,
bool can_replace = false);
/// Call addAlias several times. /// Call addAlias several times.
void addAliases(const NamesWithAliases & aliases); void addAliases(const NamesWithAliases & aliases);
@ -223,14 +226,14 @@ public:
/// Return true if column was removed from inputs. /// Return true if column was removed from inputs.
bool removeUnusedResult(const std::string & column_name); bool removeUnusedResult(const std::string & column_name);
void projectInput() { settings.project_input = true; } void projectInput() { project_input = true; }
void removeUnusedActions(const Names & required_names); void removeUnusedActions(const Names & required_names);
bool hasArrayJoin() const; bool hasArrayJoin() const;
bool hasStatefulFunctions() const; bool hasStatefulFunctions() const;
bool trivial() const; /// If actions has no functions or array join. bool trivial() const; /// If actions has no functions or array join.
const ActionsSettings & getSettings() const { return settings; } //const ActionsSettings & getSettings() const { return settings; }
void compileExpressions(); void compileExpressions();
@ -289,31 +292,30 @@ public:
ActionsDAGPtr splitActionsForFilter(const std::string & filter_name, bool can_remove_filter, const Names & available_inputs); ActionsDAGPtr splitActionsForFilter(const std::string & filter_name, bool can_remove_filter, const Names & available_inputs);
private: private:
Node & addNode(Node node, bool can_replace = false, bool add_to_index = true); Node & addNode(Node node);
Node & getNode(const std::string & name); // Node & getNode(const std::string & name);
Node & addAlias(Node & child, std::string alias, bool can_replace); // Node & addAlias(Node & child, std::string alias, bool can_replace);
Node & addFunction( // Node & addFunction(
const FunctionOverloadResolverPtr & function, // const FunctionOverloadResolverPtr & function,
Inputs children, // Inputs children,
std::string result_name, // std::string result_name,
bool can_replace, // bool can_replace,
bool add_to_index = true); // bool add_to_index = true);
ActionsDAGPtr cloneEmpty() const // ActionsDAGPtr cloneEmpty() const
{ // {
auto actions = std::make_shared<ActionsDAG>(); // auto actions = std::make_shared<ActionsDAG>();
actions->settings = settings; // // actions->settings = settings;
#if USE_EMBEDDED_COMPILER // // #if USE_EMBEDDED_COMPILER
actions->compilation_cache = compilation_cache; // // actions->compilation_cache = compilation_cache;
#endif // // #endif
return actions; // return actions;
} // }
void removeUnusedActions(const std::vector<Node *> & required_nodes);
void removeUnusedActions(bool allow_remove_inputs = true); void removeUnusedActions(bool allow_remove_inputs = true);
void addAliases(const NamesWithAliases & aliases, std::vector<Node *> & result_nodes); void addAliases(const NamesWithAliases & aliases, bool project);
void compileFunctions(); void compileFunctions();