Merge pull request #22763 from CurtizJ/fix-having-push-down

Fix pushdown of having
This commit is contained in:
Nikolai Kochetov 2021-04-08 21:53:50 +03:00 committed by GitHub
commit 3426bc3906
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 57 additions and 20 deletions

View File

@ -1349,7 +1349,7 @@ ColumnsWithTypeAndName prepareFunctionArguments(const ActionsDAG::NodeRawConstPt
/// Create actions which calculate conjunction of selected nodes. /// Create actions which calculate conjunction of selected nodes.
/// Assume conjunction nodes are predicates (and may be used as arguments of function AND). /// Assume conjunction nodes are predicates (and may be used as arguments of function AND).
/// ///
/// Result actions add single column with conjunction result (it is always last in index). /// Result actions add single column with conjunction result (it is always first in index).
/// No other columns are added or removed. /// No other columns are added or removed.
ActionsDAGPtr ActionsDAG::cloneActionsForConjunction(NodeRawConstPtrs conjunction, const ColumnsWithTypeAndName & all_inputs) ActionsDAGPtr ActionsDAG::cloneActionsForConjunction(NodeRawConstPtrs conjunction, const ColumnsWithTypeAndName & all_inputs)
{ {
@ -1414,6 +1414,20 @@ ActionsDAGPtr ActionsDAG::cloneActionsForConjunction(NodeRawConstPtrs conjunctio
} }
} }
const Node * result_predicate = nodes_mapping[*conjunction.begin()];
if (conjunction.size() > 1)
{
NodeRawConstPtrs args;
args.reserve(conjunction.size());
for (const auto * predicate : conjunction)
args.emplace_back(nodes_mapping[predicate]);
result_predicate = &actions->addFunction(func_builder_and, std::move(args), {});
}
actions->index.push_back(result_predicate);
for (const auto & col : all_inputs) for (const auto & col : all_inputs)
{ {
const Node * input; const Node * input;
@ -1430,19 +1444,6 @@ ActionsDAGPtr ActionsDAG::cloneActionsForConjunction(NodeRawConstPtrs conjunctio
actions->index.push_back(input); actions->index.push_back(input);
} }
const Node * result_predicate = nodes_mapping[*conjunction.begin()];
if (conjunction.size() > 1)
{
NodeRawConstPtrs args;
args.reserve(conjunction.size());
for (const auto * predicate : conjunction)
args.emplace_back(nodes_mapping[predicate]);
result_predicate = &actions->addFunction(func_builder_and, std::move(args), {});
}
actions->index.push_back(result_predicate);
return actions; return actions;
} }
@ -1458,6 +1459,11 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown(
"Index for ActionsDAG does not contain filter column name {}. DAG:\n{}", "Index for ActionsDAG does not contain filter column name {}. DAG:\n{}",
filter_name, dumpDAG()); filter_name, dumpDAG());
/// If condition is constant let's do nothing.
/// It means there is nothing to push down or optimization was already applied.
if (predicate->type == ActionType::COLUMN)
return nullptr;
std::unordered_set<const Node *> allowed_nodes; std::unordered_set<const Node *> allowed_nodes;
/// Get input nodes from available_inputs names. /// Get input nodes from available_inputs names.
@ -1507,7 +1513,19 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown(
node.result_name = std::move(predicate->result_name); node.result_name = std::move(predicate->result_name);
node.result_type = std::move(predicate->result_type); node.result_type = std::move(predicate->result_type);
node.column = node.result_type->createColumnConst(0, 1); node.column = node.result_type->createColumnConst(0, 1);
*predicate = std::move(node);
if (predicate->type != ActionType::INPUT)
*predicate = std::move(node);
else
{
/// Special case. We cannot replace input to constant inplace.
/// Because we cannot affect inputs list for actions.
/// So we just add a new constant and update index.
const auto * new_predicate = &addNode(node);
for (auto & index_node : index)
if (index_node == predicate)
index_node = new_predicate;
}
} }
removeUnusedActions(false); removeUnusedActions(false);

View File

@ -220,7 +220,7 @@ public:
/// Create actions which may calculate part of filter using only available_inputs. /// Create actions which may calculate part of filter using only available_inputs.
/// If nothing may be calculated, returns nullptr. /// If nothing may be calculated, returns nullptr.
/// Otherwise, return actions which inputs are from available_inputs. /// Otherwise, return actions which inputs are from available_inputs.
/// Returned actions add single column which may be used for filter. /// Returned actions add single column which may be used for filter. Added column will be the first one.
/// Also, replace some nodes of current inputs to constant 1 in case they are filtered. /// Also, replace some nodes of current inputs to constant 1 in case they are filtered.
/// ///
/// @param all_inputs should contain inputs from previous step, which will be used for result actions. /// @param all_inputs should contain inputs from previous step, which will be used for result actions.
@ -231,9 +231,9 @@ public:
/// Pushed condition: z > 0 /// Pushed condition: z > 0
/// GROUP BY step will transform columns `x, y, z` -> `sum(x), y, z` /// GROUP BY step will transform columns `x, y, z` -> `sum(x), y, z`
/// If we just add filter step with actions `z -> z > 0` before GROUP BY, /// If we just add filter step with actions `z -> z > 0` before GROUP BY,
/// columns will be transformed like `x, y, z` -> `z, z > 0, x, y` -(remove filter)-> `z, x, y`. /// columns will be transformed like `x, y, z` -> `z > 0, z, x, y` -(remove filter)-> `z, x, y`.
/// To avoid it, add inputs from `all_inputs` list, /// To avoid it, add inputs from `all_inputs` list,
/// so actions `x, y, z -> x, y, z, z > 0` -(remove filter)-> `x, y, z` will not change columns order. /// so actions `x, y, z -> z > 0, x, y, z` -(remove filter)-> `x, y, z` will not change columns order.
ActionsDAGPtr cloneActionsForFilterPushDown( ActionsDAGPtr cloneActionsForFilterPushDown(
const std::string & filter_name, const std::string & filter_name,
bool can_remove_filter, bool can_remove_filter,

View File

@ -73,8 +73,8 @@ static size_t tryAddNewFilterStep(
child_node->children.emplace_back(&node); child_node->children.emplace_back(&node);
/// Expression/Filter -> Aggregating -> Filter -> Something /// Expression/Filter -> Aggregating -> Filter -> Something
/// New filter column is added to the end. /// New filter column is the first one.
auto split_filter_column_name = (*split_filter->getIndex().rbegin())->result_name; auto split_filter_column_name = (*split_filter->getIndex().begin())->result_name;
node.step = std::make_unique<FilterStep>( node.step = std::make_unique<FilterStep>(
node.children.at(0)->step->getOutputStream(), node.children.at(0)->step->getOutputStream(),
std::move(split_filter), std::move(split_filter_column_name), true); std::move(split_filter), std::move(split_filter_column_name), true);

View File

@ -0,0 +1,2 @@
1 1 2
1 \N

View File

@ -0,0 +1,17 @@
DROP TABLE IF EXISTS t_having;
CREATE TABLE t_having (c0 Int32, c1 UInt64) ENGINE = Memory;
INSERT INTO t_having SELECT number, number FROM numbers(1000);
SELECT sum(c0 = 0), min(c0 + 1), sum(c0 + 2) FROM t_having
GROUP BY c0 HAVING c0 = 0
SETTINGS enable_optimize_predicate_expression=0;
SELECT c0 + -1, sum(intDivOrZero(intDivOrZero(NULL, NULL), '2'), intDivOrZero(10000000000., intDivOrZero(intDivOrZero(intDivOrZero(NULL, NULL), 10), NULL))) FROM t_having GROUP BY c0 = 2, c0 = 10, intDivOrZero(intDivOrZero(intDivOrZero(NULL, NULL), NULL), NULL), c0 HAVING c0 = 2 SETTINGS enable_optimize_predicate_expression = 0;
SELECT sum(c0 + 257) FROM t_having GROUP BY c0 = -9223372036854775808, NULL, -2147483649, c0 HAVING c0 = -9223372036854775808 SETTINGS enable_optimize_predicate_expression = 0;
SELECT c0 + -2, c0 + -9223372036854775807, c0 = NULL FROM t_having GROUP BY c0 = 0.9998999834060669, 1023, c0 HAVING c0 = 0.9998999834060669 SETTINGS enable_optimize_predicate_expression = 0;
DROP TABLE t_having;