From 6d40c1ea2c2f149ab374f19e37e96f8e99c85022 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 2 May 2020 01:21:09 +0300 Subject: [PATCH 1/6] Preserve order of expressions during predicates optimization (enable_optimize_predicate_expression) Before this patch enable_optimize_predicate_expression changes the order *every* time, so: foo AND bar -> bar AND foo And this causes troubles for distributed queries, since query on shard will have different order of expressions with the initiating server. --- src/Interpreters/PredicateExpressionsOptimizer.cpp | 12 +++--------- .../00740_optimize_predicate_expression.reference | 1 + .../00740_optimize_predicate_expression.sql | 4 ++++ 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/Interpreters/PredicateExpressionsOptimizer.cpp b/src/Interpreters/PredicateExpressionsOptimizer.cpp index d2f38b85b4b..b5d2c632135 100644 --- a/src/Interpreters/PredicateExpressionsOptimizer.cpp +++ b/src/Interpreters/PredicateExpressionsOptimizer.cpp @@ -49,13 +49,6 @@ static ASTs splitConjunctionPredicate(const std::initializer_list { std::vector res; - auto remove_expression_at_index = [&res] (const size_t index) - { - if (index < res.size() - 1) - std::swap(res[index], res.back()); - res.pop_back(); - }; - for (const auto & predicate : predicates) { if (!predicate) @@ -65,14 +58,15 @@ static ASTs splitConjunctionPredicate(const std::initializer_list for (size_t idx = 0; idx < res.size();) { - const auto & expression = res.at(idx); + ASTPtr expression = res.at(idx); if (const auto * function = expression->as(); function && function->name == "and") { + res.erase(res.begin() + idx); + for (auto & child : function->arguments->children) res.emplace_back(child); - remove_expression_at_index(idx); continue; } ++idx; diff --git a/tests/queries/0_stateless/00740_optimize_predicate_expression.reference b/tests/queries/0_stateless/00740_optimize_predicate_expression.reference index 6db331af725..6ec592953a9 100644 --- a/tests/queries/0_stateless/00740_optimize_predicate_expression.reference +++ b/tests/queries/0_stateless/00740_optimize_predicate_expression.reference @@ -1 +1,2 @@ nan +SELECT dummy\nFROM system.one\nWHERE (dummy > 0) AND (dummy < 0) diff --git a/tests/queries/0_stateless/00740_optimize_predicate_expression.sql b/tests/queries/0_stateless/00740_optimize_predicate_expression.sql index b016ab49ddd..3be996b265c 100644 --- a/tests/queries/0_stateless/00740_optimize_predicate_expression.sql +++ b/tests/queries/0_stateless/00740_optimize_predicate_expression.sql @@ -32,4 +32,8 @@ FROM ( WHERE user_id = 999 ) js2 USING site); +-- check order is preserved +SET enable_debug_queries = 1; +ANALYZE SELECT * FROM system.one HAVING dummy > 0 AND dummy < 0; + DROP TABLE perf; From f19c619dfe87a5254add11a76a127248601eea29 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 2 May 2020 01:32:57 +0300 Subject: [PATCH 2/6] Add test from #10613 for enable_optimize_predicate_expression --- .../0_stateless/00740_optimize_predicate_expression.sql | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/queries/0_stateless/00740_optimize_predicate_expression.sql b/tests/queries/0_stateless/00740_optimize_predicate_expression.sql index 3be996b265c..78065c9896e 100644 --- a/tests/queries/0_stateless/00740_optimize_predicate_expression.sql +++ b/tests/queries/0_stateless/00740_optimize_predicate_expression.sql @@ -36,4 +36,11 @@ FROM ( SET enable_debug_queries = 1; ANALYZE SELECT * FROM system.one HAVING dummy > 0 AND dummy < 0; +-- from #10613 +SELECT name, count() AS cnt +FROM remote('127.{1,2}', system.settings) +GROUP BY name +HAVING (max(value) > '9') AND (min(changed) = 0) +FORMAT Null; + DROP TABLE perf; From 765ed4f89b80eeb63102ae8f1bb9ad60dba9a3fe Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 2 May 2020 13:33:34 +0300 Subject: [PATCH 3/6] Fix 01056_predicate_optimizer_bugs --- .../0_stateless/01056_predicate_optimizer_bugs.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01056_predicate_optimizer_bugs.reference b/tests/queries/0_stateless/01056_predicate_optimizer_bugs.reference index 019e95cb359..3fbdd87eec4 100644 --- a/tests/queries/0_stateless/01056_predicate_optimizer_bugs.reference +++ b/tests/queries/0_stateless/01056_predicate_optimizer_bugs.reference @@ -4,7 +4,7 @@ a 2 1 0 a 3 1 0 b 13 2 0 b 15 2 0 -SELECT \n co, \n co2, \n co3, \n num\nFROM \n(\n SELECT \n co, \n co2, \n co3, \n count() AS num\n FROM \n (\n SELECT \n 1 AS co, \n 2 AS co2, \n 3 AS co3\n )\n GROUP BY \n co, \n co2, \n co3\n WITH CUBE\n HAVING (co != 0) AND (co2 != 2)\n)\nWHERE (co != 0) AND (co2 != 2) +SELECT \n co, \n co2, \n co3, \n num\nFROM \n(\n SELECT \n co, \n co2, \n co3, \n count() AS num\n FROM \n (\n SELECT \n 1 AS co, \n 2 AS co2, \n 3 AS co3\n )\n GROUP BY \n co, \n co2, \n co3\n WITH CUBE\n HAVING (co2 != 2) AND (co != 0)\n)\nWHERE (co != 0) AND (co2 != 2) 1 0 3 1 1 0 0 1 SELECT alias AS name\nFROM \n(\n SELECT name AS alias\n FROM system.settings\n WHERE alias = \'enable_optimize_predicate_expression\'\n)\nANY INNER JOIN \n(\n SELECT name\n FROM system.settings\n) USING (name)\nWHERE name = \'enable_optimize_predicate_expression\' From 88d08f9f39854d3f4e12192f11426b251588a280 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 2 May 2020 13:34:45 +0300 Subject: [PATCH 4/6] Move tests from 00740_optimize_predicate_expression to 01056_predicate_optimizer_bugs --- .../00740_optimize_predicate_expression.reference | 1 - .../00740_optimize_predicate_expression.sql | 11 ----------- .../01056_predicate_optimizer_bugs.reference | 1 + .../0_stateless/01056_predicate_optimizer_bugs.sql | 10 ++++++++++ 4 files changed, 11 insertions(+), 12 deletions(-) diff --git a/tests/queries/0_stateless/00740_optimize_predicate_expression.reference b/tests/queries/0_stateless/00740_optimize_predicate_expression.reference index 6ec592953a9..6db331af725 100644 --- a/tests/queries/0_stateless/00740_optimize_predicate_expression.reference +++ b/tests/queries/0_stateless/00740_optimize_predicate_expression.reference @@ -1,2 +1 @@ nan -SELECT dummy\nFROM system.one\nWHERE (dummy > 0) AND (dummy < 0) diff --git a/tests/queries/0_stateless/00740_optimize_predicate_expression.sql b/tests/queries/0_stateless/00740_optimize_predicate_expression.sql index 78065c9896e..b016ab49ddd 100644 --- a/tests/queries/0_stateless/00740_optimize_predicate_expression.sql +++ b/tests/queries/0_stateless/00740_optimize_predicate_expression.sql @@ -32,15 +32,4 @@ FROM ( WHERE user_id = 999 ) js2 USING site); --- check order is preserved -SET enable_debug_queries = 1; -ANALYZE SELECT * FROM system.one HAVING dummy > 0 AND dummy < 0; - --- from #10613 -SELECT name, count() AS cnt -FROM remote('127.{1,2}', system.settings) -GROUP BY name -HAVING (max(value) > '9') AND (min(changed) = 0) -FORMAT Null; - DROP TABLE perf; diff --git a/tests/queries/0_stateless/01056_predicate_optimizer_bugs.reference b/tests/queries/0_stateless/01056_predicate_optimizer_bugs.reference index 3fbdd87eec4..bd132202979 100644 --- a/tests/queries/0_stateless/01056_predicate_optimizer_bugs.reference +++ b/tests/queries/0_stateless/01056_predicate_optimizer_bugs.reference @@ -26,3 +26,4 @@ SELECT dummy\nFROM \n(\n SELECT dummy\n FROM system.one\n WHERE arrayMa 0 SELECT \n id, \n value, \n value_1\nFROM \n(\n SELECT \n 1 AS id, \n 2 AS value\n)\nALL INNER JOIN \n(\n SELECT \n 1 AS id, \n 3 AS value_1\n) USING (id)\nWHERE arrayMap(x -> ((x + value) + value_1), [1]) = [6] 1 2 3 +SELECT dummy\nFROM system.one\nWHERE (dummy > 0) AND (dummy < 0) diff --git a/tests/queries/0_stateless/01056_predicate_optimizer_bugs.sql b/tests/queries/0_stateless/01056_predicate_optimizer_bugs.sql index db6c78e3cc6..18552a6591d 100644 --- a/tests/queries/0_stateless/01056_predicate_optimizer_bugs.sql +++ b/tests/queries/0_stateless/01056_predicate_optimizer_bugs.sql @@ -74,3 +74,13 @@ SELECT * FROM (SELECT * FROM system.one) WHERE arrayMap(x -> x + 1, [dummy]) = [ ANALYZE SELECT * FROM (SELECT 1 AS id, 2 AS value) INNER JOIN (SELECT 1 AS id, 3 AS value_1) USING id WHERE arrayMap(x -> x + value + value_1, [1]) = [6]; SELECT * FROM (SELECT 1 AS id, 2 AS value) INNER JOIN (SELECT 1 AS id, 3 AS value_1) USING id WHERE arrayMap(x -> x + value + value_1, [1]) = [6]; + +-- check order is preserved +ANALYZE SELECT * FROM system.one HAVING dummy > 0 AND dummy < 0; + +-- from #10613 +SELECT name, count() AS cnt +FROM remote('127.{1,2}', system.settings) +GROUP BY name +HAVING (max(value) > '9') AND (min(changed) = 0) +FORMAT Null; From 1f45c4302a4d10770e541eb768f65c15de9461ff Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 2 May 2020 14:45:01 +0300 Subject: [PATCH 5/6] Drop check for getPositionByName() (it already has it) in ExpressionActions --- src/Interpreters/ExpressionActions.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index 363d4765019..c86239b71ae 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -342,11 +342,7 @@ void ExpressionAction::execute(Block & block, bool dry_run, ExtraBlockPtr & not_ { ColumnNumbers arguments(argument_names.size()); for (size_t i = 0; i < argument_names.size(); ++i) - { - if (!block.has(argument_names[i])) - throw Exception("Not found column: '" + argument_names[i] + "'", ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); arguments[i] = block.getPositionByName(argument_names[i]); - } size_t num_columns_without_result = block.columns(); block.insert({ nullptr, result_type, result_name}); From 6a9ec1396ae0b4c46aecafa4cb83fd895653d231 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 2 May 2020 14:27:25 +0300 Subject: [PATCH 6/6] Access aggregate keys by names over indexes Relax this to allow accepting aggregate keys in different order (typical use case is distributed queries, where the initiator server will do final merge). --- src/Interpreters/Aggregator.cpp | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index 7cd6a8341cd..54f54b9b69e 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -127,11 +127,11 @@ Block Aggregator::getHeader(bool final) const if (final) { - for (size_t i = 0; i < params.aggregates_size; ++i) + for (const auto & aggregate : params.aggregates) { - auto & elem = res.getByPosition(params.keys_size + i); + auto & elem = res.getByName(aggregate.column_name); - elem.type = params.aggregates[i].function->getReturnType(); + elem.type = aggregate.function->getReturnType(); elem.column = elem.type->createColumn(); } } @@ -1057,7 +1057,8 @@ Block Aggregator::prepareBlockAndFill( { if (!final) { - aggregate_columns[i] = header.safeGetByPosition(i + params.keys_size).type->createColumn(); + const auto & aggregate_column_name = params.aggregates[i].column_name; + aggregate_columns[i] = header.getByName(aggregate_column_name).type->createColumn(); /// The ColumnAggregateFunction column captures the shared ownership of the arena with the aggregate function states. ColumnAggregateFunction & column_aggregate_func = assert_cast(*aggregate_columns[i]); @@ -1093,10 +1094,11 @@ Block Aggregator::prepareBlockAndFill( for (size_t i = 0; i < params.aggregates_size; ++i) { + const auto & aggregate_column_name = params.aggregates[i].column_name; if (final) - res.getByPosition(i + params.keys_size).column = std::move(final_aggregate_columns[i]); + res.getByName(aggregate_column_name).column = std::move(final_aggregate_columns[i]); else - res.getByPosition(i + params.keys_size).column = std::move(aggregate_columns[i]); + res.getByName(aggregate_column_name).column = std::move(aggregate_columns[i]); } /// Change the size of the columns-constants in the block. @@ -1821,7 +1823,10 @@ void NO_INLINE Aggregator::mergeStreamsImplCase( key_columns[i] = block.safeGetByPosition(i).column.get(); for (size_t i = 0; i < params.aggregates_size; ++i) - aggregate_columns[i] = &typeid_cast(*block.safeGetByPosition(params.keys_size + i).column).getData(); + { + const auto & aggregate_column_name = params.aggregates[i].column_name; + aggregate_columns[i] = &typeid_cast(*block.getByName(aggregate_column_name).column).getData(); + } typename Method::State state(key_columns, key_sizes, aggregation_state_cache); @@ -1897,7 +1902,10 @@ void NO_INLINE Aggregator::mergeWithoutKeyStreamsImpl( /// Remember the columns we will work with for (size_t i = 0; i < params.aggregates_size; ++i) - aggregate_columns[i] = &typeid_cast(*block.safeGetByPosition(params.keys_size + i).column).getData(); + { + const auto & aggregate_column_name = params.aggregates[i].column_name; + aggregate_columns[i] = &typeid_cast(*block.getByName(aggregate_column_name).column).getData(); + } AggregatedDataWithoutKey & res = result.without_key; if (!res)