Fix removing filter column from expression after Filter actions split

This commit is contained in:
Nikolai Kochetov 2021-02-04 14:44:00 +03:00
parent fd5adf1ff6
commit 7a2279d067
3 changed files with 40 additions and 29 deletions

View File

@ -454,36 +454,42 @@ bool ActionsDAG::tryRestoreColumn(const std::string & column_name)
return false;
}
void ActionsDAG::removeUnusedInput(const std::string & column_name)
bool ActionsDAG::removeUnusedResult(const std::string & column_name)
{
/// Find column in index and remove.
const Node * col;
{
auto it = index.begin();
for (; it != index.end(); ++it)
if ((*it)->result_name == column_name)
break;
if (it == index.end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Not found result {} in ActionsDAG\n{}", column_name, dumpDAG());
col = *it;
index.remove(it);
}
/// Check if column is in input.
auto it = inputs.begin();
for (; it != inputs.end(); ++it)
if ((*it)->result_name == column_name)
if (*it == col)
break;
if (it == inputs.end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Not found input {} in ActionsDAG\n{}", column_name, dumpDAG());
return false;
auto * input = *it;
/// Check column has no dependent.
for (const auto & node : nodes)
for (const auto * child : node.children)
if (input == child)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Cannot remove input {} because it has dependent nodes in ActionsDAG\n{}",
column_name, dumpDAG());
for (auto jt = index.begin(); jt != index.end(); ++jt)
{
if (*jt == input)
{
index.remove(jt);
break;
}
}
if (col == child)
return false;
/// Remove from nodes and inputs.
for (auto jt = nodes.begin(); jt != nodes.end(); ++jt)
{
if (&(*jt) == input)
if (&(*jt) == *it)
{
nodes.erase(jt);
break;
@ -491,6 +497,7 @@ void ActionsDAG::removeUnusedInput(const std::string & column_name)
}
inputs.erase(it);
return true;
}
ActionsDAGPtr ActionsDAG::clone() const
@ -844,7 +851,7 @@ ActionsDAGPtr ActionsDAG::merge(ActionsDAG && first, ActionsDAG && second)
return std::make_shared<ActionsDAG>(std::move(first));
}
std::pair<ActionsDAGPtr, ActionsDAGPtr> ActionsDAG::split(std::unordered_set<const Node *> split_nodes) const
ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set<const Node *> split_nodes) const
{
/// Split DAG into two parts.
/// (first_nodes, first_index) is a part which will have split_list in result.
@ -1045,7 +1052,7 @@ std::pair<ActionsDAGPtr, ActionsDAGPtr> ActionsDAG::split(std::unordered_set<con
return {std::move(first_actions), std::move(second_actions)};
}
std::pair<ActionsDAGPtr, ActionsDAGPtr> ActionsDAG::splitActionsBeforeArrayJoin(const NameSet & array_joined_columns) const
ActionsDAG::SplitResult ActionsDAG::splitActionsBeforeArrayJoin(const NameSet & array_joined_columns) const
{
struct Frame
@ -1113,7 +1120,7 @@ std::pair<ActionsDAGPtr, ActionsDAGPtr> ActionsDAG::splitActionsBeforeArrayJoin
return res;
}
std::pair<ActionsDAGPtr, ActionsDAGPtr> ActionsDAG::splitActionsForFilter(const std::string & column_name) const
ActionsDAG::SplitResult ActionsDAG::splitActionsForFilter(const std::string & column_name) const
{
auto it = index.begin();
for (; it != index.end(); ++it)

View File

@ -214,9 +214,10 @@ public:
/// If column is not in index, try to find it in nodes and insert back into index.
bool tryRestoreColumn(const std::string & column_name);
/// Find column in input. Remove it from input and index.
/// Checks that column in inputs and has not dependent nodes.
void removeUnusedInput(const std::string & column_name);
/// Find column in result. Remove it from index.
/// If columns is in inputs and has no dependent nodes, remove it from inputs too.
/// Return true if column was removed from inputs.
bool removeUnusedResult(const std::string & column_name);
void projectInput() { settings.project_input = true; }
void removeUnusedActions(const Names & required_names);
@ -255,18 +256,20 @@ public:
/// Otherwise, any two actions may be combined.
static ActionsDAGPtr merge(ActionsDAG && first, ActionsDAG && second);
using SplitResult = std::pair<ActionsDAGPtr, ActionsDAGPtr>;
/// Split ActionsDAG into two DAGs, where first part contains all nodes from split_nodes and their children.
/// Execution of first then second parts on block is equivalent to execution of initial DAG.
/// First DAG and initial DAG have equal inputs, second DAG and initial DAG has equal index (outputs).
/// Second DAG inputs may contain less inputs then first DAG (but also include other columns).
std::pair<ActionsDAGPtr, ActionsDAGPtr> split(std::unordered_set<const Node *> split_nodes) const;
SplitResult split(std::unordered_set<const Node *> split_nodes) const;
/// Splits actions into two parts. Returned first half may be swapped with ARRAY JOIN.
std::pair<ActionsDAGPtr, ActionsDAGPtr> splitActionsBeforeArrayJoin(const NameSet & array_joined_columns) const;
SplitResult splitActionsBeforeArrayJoin(const NameSet & array_joined_columns) const;
/// Splits actions into two parts. First part has minimal size sufficient for calculation of column_name.
/// Index of initial actions must contain column_name.
std::pair<ActionsDAGPtr, ActionsDAGPtr> splitActionsForFilter(const std::string & column_name) const;
SplitResult splitActionsForFilter(const std::string & column_name) const;
private:
Node & addNode(Node node, bool can_replace = false);

View File

@ -24,8 +24,9 @@ size_t trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes)
if (split.second->trivial())
return 0;
bool remove_filter = false;
if (filter_step->removesFilterColumn())
split.second->removeUnusedInput(filter_step->getFilterColumnName());
remove_filter = split.second->removeUnusedResult(filter_step->getFilterColumnName());
auto description = filter_step->getStepDescription();
@ -37,7 +38,7 @@ size_t trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes)
filter_node.children.at(0)->step->getOutputStream(),
std::move(split.first),
filter_step->getFilterColumnName(),
filter_step->removesFilterColumn());
remove_filter);
node->step = std::make_unique<ExpressionStep>(filter_node.step->getOutputStream(), std::move(split.second));