From 43bf7183181ae79e7922327fe296151a404c54ac Mon Sep 17 00:00:00 2001 From: vdimir Date: Mon, 6 Nov 2023 16:11:27 +0000 Subject: [PATCH 1/2] Fix crash in filterPushDown --- src/Interpreters/ActionsDAG.cpp | 35 +++++++++++-------- src/Interpreters/ActionsDAG.h | 3 ++ .../02907_filter_pushdown_crash.reference | 2 ++ .../02907_filter_pushdown_crash.sql | 18 ++++++++++ 4 files changed, 44 insertions(+), 14 deletions(-) create mode 100644 tests/queries/0_stateless/02907_filter_pushdown_crash.reference create mode 100644 tests/queries/0_stateless/02907_filter_pushdown_crash.sql diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 04dee2ed6e6..af1f2db540b 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -464,12 +464,19 @@ void ActionsDAG::removeUnusedActions(const Names & required_names, bool allow_re void ActionsDAG::removeUnusedActions(bool allow_remove_inputs, bool allow_constant_folding) { - std::unordered_set visited_nodes; std::unordered_set used_inputs; - std::stack stack; + if (!allow_remove_inputs) + { + for (const auto * input : inputs) + used_inputs.insert(input); + } + removeUnusedActions(used_inputs, allow_constant_folding); +} - for (const auto * input : inputs) - used_inputs.insert(input); +void ActionsDAG::removeUnusedActions(const std::unordered_set & used_inputs, bool allow_constant_folding) +{ + std::unordered_set visited_nodes; + std::stack stack; for (const auto * node : outputs) { @@ -488,7 +495,7 @@ void ActionsDAG::removeUnusedActions(bool allow_remove_inputs, bool allow_consta stack.push(&node); } - if (node.type == ActionType::INPUT && !allow_remove_inputs && used_inputs.contains(&node)) + if (node.type == ActionType::INPUT && used_inputs.contains(&node)) visited_nodes.insert(&node); } @@ -2187,14 +2194,6 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown( { /// If filter column is not needed, remove it from output nodes. std::erase_if(outputs, [&](const Node * node) { return node == predicate; }); - - /// At the very end of this method we'll call removeUnusedActions() with allow_remove_inputs=false, - /// so we need to manually remove predicate if it is an input node. - if (predicate->type == ActionType::INPUT) - { - std::erase_if(inputs, [&](const Node * node) { return node == predicate; }); - nodes.remove_if([&](const Node & node) { return &node == predicate; }); - } } else { @@ -2261,7 +2260,15 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown( } } - removeUnusedActions(false); + std::unordered_set used_inputs; + for (const auto * input : inputs) + { + if (can_remove_filter && input == predicate) + continue; + used_inputs.insert(input); + } + + removeUnusedActions(used_inputs); return actions; } diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index 48ed03d7347..e6dff1ffb0b 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -182,6 +182,9 @@ public: /// Remove actions that are not needed to compute output nodes void removeUnusedActions(bool allow_remove_inputs = true, bool allow_constant_folding = true); + /// Remove actions that are not needed to compute output nodes. Keep inputs from used_inputs. + void removeUnusedActions(const std::unordered_set & used_inputs, bool allow_constant_folding = true); + /// Remove actions that are not needed to compute output nodes with required names void removeUnusedActions(const Names & required_names, bool allow_remove_inputs = true, bool allow_constant_folding = true); diff --git a/tests/queries/0_stateless/02907_filter_pushdown_crash.reference b/tests/queries/0_stateless/02907_filter_pushdown_crash.reference new file mode 100644 index 00000000000..5f1d0ecea5d --- /dev/null +++ b/tests/queries/0_stateless/02907_filter_pushdown_crash.reference @@ -0,0 +1,2 @@ +2 +1 diff --git a/tests/queries/0_stateless/02907_filter_pushdown_crash.sql b/tests/queries/0_stateless/02907_filter_pushdown_crash.sql new file mode 100644 index 00000000000..6ca5500aeaf --- /dev/null +++ b/tests/queries/0_stateless/02907_filter_pushdown_crash.sql @@ -0,0 +1,18 @@ +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; + +CREATE TABLE t1 (key UInt8) ENGINE = MergeTree ORDER BY key; +INSERT INTO t1 VALUES (1),(2); + +CREATE TABLE t2 (key UInt32) ENGINE = MergeTree ORDER BY key; +INSERT INTO t2 VALUES (1),(2); + +SELECT a FROM ( SELECT key + 1 as a, key FROM t1 GROUP BY key ) WHERE key; + +SET join_algorithm = 'full_sorting_merge'; +SET max_rows_in_set_to_optimize_join = 0; + +SELECT key FROM ( SELECT key FROM t1 GROUP BY key ) t1 JOIN (SELECT key FROM t2) t2 ON t1.key = t2.key WHERE key; + +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; From 8c083924e1628446915040e64ceed2a96f49725d Mon Sep 17 00:00:00 2001 From: vdimir Date: Tue, 7 Nov 2023 10:27:39 +0000 Subject: [PATCH 2/2] Use format null in 02907_filter_pushdown_crash --- .../queries/0_stateless/02907_filter_pushdown_crash.reference | 2 -- tests/queries/0_stateless/02907_filter_pushdown_crash.sql | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/02907_filter_pushdown_crash.reference b/tests/queries/0_stateless/02907_filter_pushdown_crash.reference index 5f1d0ecea5d..e69de29bb2d 100644 --- a/tests/queries/0_stateless/02907_filter_pushdown_crash.reference +++ b/tests/queries/0_stateless/02907_filter_pushdown_crash.reference @@ -1,2 +0,0 @@ -2 -1 diff --git a/tests/queries/0_stateless/02907_filter_pushdown_crash.sql b/tests/queries/0_stateless/02907_filter_pushdown_crash.sql index 6ca5500aeaf..eb881823f27 100644 --- a/tests/queries/0_stateless/02907_filter_pushdown_crash.sql +++ b/tests/queries/0_stateless/02907_filter_pushdown_crash.sql @@ -7,12 +7,12 @@ INSERT INTO t1 VALUES (1),(2); CREATE TABLE t2 (key UInt32) ENGINE = MergeTree ORDER BY key; INSERT INTO t2 VALUES (1),(2); -SELECT a FROM ( SELECT key + 1 as a, key FROM t1 GROUP BY key ) WHERE key; +SELECT a FROM ( SELECT key + 1 as a, key FROM t1 GROUP BY key ) WHERE key FORMAT Null; SET join_algorithm = 'full_sorting_merge'; SET max_rows_in_set_to_optimize_join = 0; -SELECT key FROM ( SELECT key FROM t1 GROUP BY key ) t1 JOIN (SELECT key FROM t2) t2 ON t1.key = t2.key WHERE key; +SELECT key FROM ( SELECT key FROM t1 GROUP BY key ) t1 JOIN (SELECT key FROM t2) t2 ON t1.key = t2.key WHERE key FORMAT Null; DROP TABLE IF EXISTS t1; DROP TABLE IF EXISTS t2;