From 5f01a587f6c6cc2b3c5bd1711a1187b4a89d967e Mon Sep 17 00:00:00 2001 From: Artem Zuikov Date: Wed, 15 Jul 2020 13:10:21 +0300 Subject: [PATCH] Optimize monotonous ORDER BY (#12467) --- src/Core/Settings.h | 1 + src/Interpreters/MonotonicityCheckVisitor.h | 142 +++++++++++++++ src/Interpreters/SyntaxAnalyzer.cpp | 45 +++++ tests/performance/monotonous_order_by.xml | 9 + ...monotonous_functions_in_order_by.reference | 168 ++++++++++++++++++ ...01321_monotonous_functions_in_order_by.sql | 59 ++++++ ...rder_by_with_different_variables.reference | 32 ++++ ...nous_order_by_with_different_variables.sql | 21 +++ 8 files changed, 477 insertions(+) create mode 100644 src/Interpreters/MonotonicityCheckVisitor.h create mode 100644 tests/performance/monotonous_order_by.xml create mode 100644 tests/queries/0_stateless/01321_monotonous_functions_in_order_by.reference create mode 100644 tests/queries/0_stateless/01321_monotonous_functions_in_order_by.sql create mode 100644 tests/queries/0_stateless/01322_monotonous_order_by_with_different_variables.reference create mode 100644 tests/queries/0_stateless/01322_monotonous_order_by_with_different_variables.sql diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 21650b0c6fa..dbae8bdb978 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -372,6 +372,7 @@ struct Settings : public SettingsCollection M(SettingBool, optimize_duplicate_order_by_and_distinct, true, "Remove duplicate ORDER BY and DISTINCT if it's possible", 0) \ M(SettingBool, optimize_redundant_functions_in_order_by, true, "Remove functions from ORDER BY if its argument is also in ORDER BY", 0) \ M(SettingBool, optimize_if_chain_to_multiif, false, "Replace if(cond1, then1, if(cond2, ...)) chains to multiIf. Currently it's not beneficial for numeric types.", 0) \ + M(SettingBool, optimize_monotonous_functions_in_order_by, true, "Replace monotonous function with its argument in ORDER BY", 0) \ M(SettingBool, allow_experimental_alter_materialized_view_structure, false, "Allow atomic alter on Materialized views. Work in progress.", 0) \ M(SettingBool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there're constants there", 0) \ \ diff --git a/src/Interpreters/MonotonicityCheckVisitor.h b/src/Interpreters/MonotonicityCheckVisitor.h new file mode 100644 index 00000000000..d813f9618e1 --- /dev/null +++ b/src/Interpreters/MonotonicityCheckVisitor.h @@ -0,0 +1,142 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +using Monotonicity = IFunctionBase::Monotonicity; + +/// Checks from bottom to top if function composition is monotonous +class MonotonicityCheckMatcher +{ +public: + struct Data + { + const TablesWithColumns & tables; + const Context & context; + const std::unordered_set & group_by_function_hashes; + Monotonicity monotonicity{true, true, true}; + ASTIdentifier * identifier = nullptr; + DataTypePtr arg_data_type = {}; + + void reject() { monotonicity.is_monotonic = false; } + bool isRejected() const { return !monotonicity.is_monotonic; } + + bool canOptimize(const ASTFunction & ast_function) const + { + /// if GROUP BY contains the same function ORDER BY shouldn't be optimized + auto hash = ast_function.getTreeHash(); + String key = toString(hash.first) + '_' + toString(hash.second); + if (group_by_function_hashes.count(key)) + return false; + + /// if ORDER BY contains aggregate function it shouldn't be optimized + if (AggregateFunctionFactory::instance().isAggregateFunctionName(ast_function.name)) + return false; + + return true; + } + + bool extractIdentifierAndType(const ASTFunction & ast_function) + { + if (identifier) + return true; + + identifier = ast_function.arguments->children[0]->as(); + if (!identifier) + return false; + + auto pos = IdentifierSemantic::getMembership(*identifier); + if (!pos) + pos = IdentifierSemantic::chooseTableColumnMatch(*identifier, tables, true); + if (!pos) + return false; + + if (auto data_type_and_name = tables[*pos].columns.tryGetByName(identifier->shortName())) + { + arg_data_type = data_type_and_name->type; + return true; + } + + return false; + } + }; + + static void visit(const ASTPtr & ast, Data & data) + { + if (const auto * ast_function = ast->as()) + visit(*ast_function, data); + } + + static void visit(const ASTFunction & ast_function, Data & data) + { + if (data.isRejected()) + return; + + /// TODO: monotonicity for fucntions of several arguments + auto arguments = ast_function.arguments; + if (arguments->children.size() != 1) + { + data.reject(); + return; + } + + if (!data.canOptimize(ast_function)) + { + data.reject(); + return; + } + + const auto & function = FunctionFactory::instance().tryGet(ast_function.name, data.context); + if (!function) + { + data.reject(); + return; + } + + /// First time extract the most enclosed identifier and its data type + if (!data.arg_data_type && !data.extractIdentifierAndType(ast_function)) + { + data.reject(); + return; + } + + ColumnsWithTypeAndName args; + args.emplace_back(data.arg_data_type, "tmp"); + auto function_base = function->build(args); + + if (function_base && function_base->hasInformationAboutMonotonicity()) + { + bool is_positive = data.monotonicity.is_positive; + data.monotonicity = function_base->getMonotonicityForRange(*data.arg_data_type, Field(), Field()); + + if (!is_positive) + data.monotonicity.is_positive = !data.monotonicity.is_positive; + data.arg_data_type = function_base->getReturnType(); + } + else + data.reject(); + } + + static bool needChildVisit(const ASTPtr &, const ASTPtr &) + { + return true; + } +}; + +using MonotonicityCheckVisitor = ConstInDepthNodeVisitor; + +} diff --git a/src/Interpreters/SyntaxAnalyzer.cpp b/src/Interpreters/SyntaxAnalyzer.cpp index 0579e0c215b..7decc000b34 100644 --- a/src/Interpreters/SyntaxAnalyzer.cpp +++ b/src/Interpreters/SyntaxAnalyzer.cpp @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -529,6 +530,46 @@ void optimizeDuplicateOrderByAndDistinct(ASTPtr & query, const Context & context DuplicateDistinctVisitor(distinct_data).visit(query); } +/// Replace monotonous functions in ORDER BY if they don't participate in GROUP BY expression, +/// has a single argument and not an aggregate functions. +void optimizeMonotonousFunctionsInOrderBy(ASTSelectQuery * select_query, const Context & context, + const TablesWithColumns & tables_with_columns) +{ + auto order_by = select_query->orderBy(); + if (!order_by) + return; + + std::unordered_set group_by_hashes; + if (auto group_by = select_query->groupBy()) + { + for (auto & elem : group_by->children) + { + auto hash = elem->getTreeHash(); + String key = toString(hash.first) + '_' + toString(hash.second); + group_by_hashes.insert(key); + } + } + + for (auto & child : order_by->children) + { + auto * order_by_element = child->as(); + auto & ast_func = order_by_element->children[0]; + if (!ast_func->as()) + continue; + + MonotonicityCheckVisitor::Data data{tables_with_columns, context, group_by_hashes}; + MonotonicityCheckVisitor(data).visit(ast_func); + + if (!data.isRejected()) + { + ast_func = data.identifier->clone(); + ast_func->setAlias(""); + if (!data.monotonicity.is_positive) + order_by_element->direction *= -1; + } + } +} + /// If ORDER BY has argument x followed by f(x) transfroms it to ORDER BY x. /// Optimize ORDER BY x, y, f(x), g(x, y), f(h(x)), t(f(x), g(x)) into ORDER BY x, y /// in case if f(), g(), h(), t() are deterministic (in scope of query). @@ -1066,6 +1107,10 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyzeSelect( if (settings.optimize_redundant_functions_in_order_by) optimizeRedundantFunctionsInOrderBy(select_query, context); + /// Replace monotonous functions with its argument + if (settings.optimize_monotonous_functions_in_order_by) + optimizeMonotonousFunctionsInOrderBy(select_query, context, tables_with_columns); + /// Remove duplicated elements from LIMIT BY clause. optimizeLimitBy(select_query); diff --git a/tests/performance/monotonous_order_by.xml b/tests/performance/monotonous_order_by.xml new file mode 100644 index 00000000000..1c58c4e9d0a --- /dev/null +++ b/tests/performance/monotonous_order_by.xml @@ -0,0 +1,9 @@ + + + hits_10m_single + + + SELECT * FROM (SELECT CounterID, EventDate FROM hits_10m_single) ORDER BY toFloat32(toFloat64(toFloat32(toFloat64(CounterID)))) FORMAT Null + SELECT * FROM (SELECT CounterID, EventDate FROM hits_10m_single) ORDER BY toFloat32(toFloat64(toFloat32(toFloat64(CounterID)))) DESC, toFloat32(toFloat64(toFloat32(toFloat64(EventDate)))) ASC FORMAT Null + + diff --git a/tests/queries/0_stateless/01321_monotonous_functions_in_order_by.reference b/tests/queries/0_stateless/01321_monotonous_functions_in_order_by.reference new file mode 100644 index 00000000000..e8e7d754ed9 --- /dev/null +++ b/tests/queries/0_stateless/01321_monotonous_functions_in_order_by.reference @@ -0,0 +1,168 @@ +0 +1 +2 +0 +1 +2 +0 +1 +2 +2 +1 +0 +0 +1 +2 +0 +1 +2 +0 +1 +2 +2 +1 +0 +2 +1 +0 +2 +1 +0 +0 +1 +2 +2 +1 +0 +2 +1 +0 +SELECT number +FROM numbers(3) +ORDER BY number ASC +SELECT number +FROM numbers(3) +ORDER BY abs(toFloat32(number)) ASC +SELECT number +FROM numbers(3) +ORDER BY toFloat32(abs(number)) ASC +SELECT number +FROM numbers(3) +ORDER BY number DESC +SELECT number +FROM numbers(3) +ORDER BY exp(number) ASC +SELECT roundToExp2(number) AS x +FROM numbers(3) +ORDER BY + number ASC, + number ASC +SELECT number AS x +FROM numbers(3) +ORDER BY + number ASC, + number ASC +SELECT number +FROM numbers(3) +ORDER BY number DESC +SELECT number +FROM numbers(3) +ORDER BY abs(toFloat32(number)) DESC +SELECT number +FROM numbers(3) +ORDER BY toFloat32(abs(number)) DESC +SELECT number +FROM numbers(3) +ORDER BY number ASC +SELECT number +FROM numbers(3) +ORDER BY exp(number) DESC +SELECT roundToExp2(number) AS x +FROM numbers(3) +ORDER BY + number DESC, + number DESC +0 +1 +2 +0 +1 +2 +0 +1 +2 +2 +1 +0 +0 +1 +2 +0 +1 +2 +0 +1 +2 +2 +1 +0 +2 +1 +0 +2 +1 +0 +0 +1 +2 +2 +1 +0 +2 +1 +0 +SELECT number +FROM numbers(3) +ORDER BY toFloat32(toFloat64(number)) ASC +SELECT number +FROM numbers(3) +ORDER BY abs(toFloat32(number)) ASC +SELECT number +FROM numbers(3) +ORDER BY toFloat32(abs(number)) ASC +SELECT number +FROM numbers(3) +ORDER BY -number ASC +SELECT number +FROM numbers(3) +ORDER BY exp(number) ASC +SELECT roundToExp2(number) AS x +FROM numbers(3) +ORDER BY + x ASC, + toFloat32(x) ASC +SELECT number AS x +FROM numbers(3) +ORDER BY + toFloat32(x) AS k ASC, + toFloat64(k) ASC +SELECT number +FROM numbers(3) +ORDER BY toFloat32(toFloat64(number)) DESC +SELECT number +FROM numbers(3) +ORDER BY abs(toFloat32(number)) DESC +SELECT number +FROM numbers(3) +ORDER BY toFloat32(abs(number)) DESC +SELECT number +FROM numbers(3) +ORDER BY -number DESC +SELECT number +FROM numbers(3) +ORDER BY exp(number) DESC +SELECT roundToExp2(number) AS x +FROM numbers(3) +ORDER BY + x DESC, + toFloat32(x) DESC diff --git a/tests/queries/0_stateless/01321_monotonous_functions_in_order_by.sql b/tests/queries/0_stateless/01321_monotonous_functions_in_order_by.sql new file mode 100644 index 00000000000..7f4b5881104 --- /dev/null +++ b/tests/queries/0_stateless/01321_monotonous_functions_in_order_by.sql @@ -0,0 +1,59 @@ +SET enable_debug_queries = 1; +SET optimize_monotonous_functions_in_order_by = 1; + +SELECT number FROM numbers(3) ORDER BY toFloat32(toFloat64(number)); +SELECT number FROM numbers(3) ORDER BY abs(toFloat32(number)); +SELECT number FROM numbers(3) ORDER BY toFloat32(abs(number)); +SELECT number FROM numbers(3) ORDER BY -number; +SELECT number FROM numbers(3) ORDER BY exp(number); +SELECT roundToExp2(number) AS x FROM numbers(3) ORDER BY x, toFloat32(x); +SELECT number AS x FROM numbers(3) ORDER BY toFloat32(x) as k, toFloat64(k); +SELECT number FROM numbers(3) ORDER BY toFloat32(toFloat64(number)) DESC; +SELECT number FROM numbers(3) ORDER BY abs(toFloat32(number)) DESC; +SELECT number FROM numbers(3) ORDER BY toFloat32(abs(number)) DESC; +SELECT number FROM numbers(3) ORDER BY -number DESC; +SELECT number FROM numbers(3) ORDER BY exp(number) DESC; +SELECT roundToExp2(number) AS x FROM numbers(3) ORDER BY x DESC, toFloat32(x) DESC; +analyze SELECT number FROM numbers(3) ORDER BY toFloat32(toFloat64(number)); +analyze SELECT number FROM numbers(3) ORDER BY abs(toFloat32(number)); +analyze SELECT number FROM numbers(3) ORDER BY toFloat32(abs(number)); +analyze SELECT number FROM numbers(3) ORDER BY -number; +analyze SELECT number FROM numbers(3) ORDER BY exp(number); +analyze SELECT roundToExp2(number) AS x FROM numbers(3) ORDER BY x, toFloat32(x); +analyze SELECT number AS x FROM numbers(3) ORDER BY toFloat32(x) as k, toFloat64(k); +analyze SELECT number FROM numbers(3) ORDER BY toFloat32(toFloat64(number)) DESC; +analyze SELECT number FROM numbers(3) ORDER BY abs(toFloat32(number)) DESC; +analyze SELECT number FROM numbers(3) ORDER BY toFloat32(abs(number)) DESC; +analyze SELECT number FROM numbers(3) ORDER BY -number DESC; +analyze SELECT number FROM numbers(3) ORDER BY exp(number) DESC; +analyze SELECT roundToExp2(number) AS x FROM numbers(3) ORDER BY x DESC, toFloat32(x) DESC; + +SET optimize_monotonous_functions_in_order_by = 0; + +SELECT number FROM numbers(3) ORDER BY toFloat32(toFloat64(number)); +SELECT number FROM numbers(3) ORDER BY abs(toFloat32(number)); +SELECT number FROM numbers(3) ORDER BY toFloat32(abs(number)); +SELECT number FROM numbers(3) ORDER BY -number; +SELECT number FROM numbers(3) ORDER BY exp(number); +SELECT roundToExp2(number) AS x FROM numbers(3) ORDER BY x, toFloat32(x); +SELECT number AS x FROM numbers(3) ORDER BY toFloat32(x) as k, toFloat64(k); +SELECT number FROM numbers(3) ORDER BY toFloat32(toFloat64(number)) DESC; +SELECT number FROM numbers(3) ORDER BY abs(toFloat32(number)) DESC; +SELECT number FROM numbers(3) ORDER BY toFloat32(abs(number)) DESC; +SELECT number FROM numbers(3) ORDER BY -number DESC; +SELECT number FROM numbers(3) ORDER BY exp(number) DESC; +SELECT roundToExp2(number) AS x FROM numbers(3) ORDER BY x DESC, toFloat32(x) DESC; +analyze SELECT number FROM numbers(3) ORDER BY toFloat32(toFloat64(number)); +analyze SELECT number FROM numbers(3) ORDER BY abs(toFloat32(number)); +analyze SELECT number FROM numbers(3) ORDER BY toFloat32(abs(number)); +analyze SELECT number FROM numbers(3) ORDER BY -number; +analyze SELECT number FROM numbers(3) ORDER BY exp(number); +analyze SELECT roundToExp2(number) AS x FROM numbers(3) ORDER BY x, toFloat32(x); +analyze SELECT number AS x FROM numbers(3) ORDER BY toFloat32(x) as k, toFloat64(k); +analyze SELECT number FROM numbers(3) ORDER BY toFloat32(toFloat64(number)) DESC; +analyze SELECT number FROM numbers(3) ORDER BY abs(toFloat32(number)) DESC; +analyze SELECT number FROM numbers(3) ORDER BY toFloat32(abs(number)) DESC; +analyze SELECT number FROM numbers(3) ORDER BY -number DESC; +analyze SELECT number FROM numbers(3) ORDER BY exp(number) DESC; +analyze SELECT roundToExp2(number) AS x FROM numbers(3) ORDER BY x DESC, toFloat32(x) DESC; +-- TODO: exp() should be monotonous function diff --git a/tests/queries/0_stateless/01322_monotonous_order_by_with_different_variables.reference b/tests/queries/0_stateless/01322_monotonous_order_by_with_different_variables.reference new file mode 100644 index 00000000000..cf2935a40bf --- /dev/null +++ b/tests/queries/0_stateless/01322_monotonous_order_by_with_different_variables.reference @@ -0,0 +1,32 @@ +1 4 3 +1 3 3 +2 5 4 +2 2 4 +1 3 3 +1 4 3 +2 2 4 +2 5 4 +2 +1 +2 +1 3 3 +1 4 3 +2 5 4 +2 2 4 +2 +1 4 3 +1 3 3 +2 5 4 +2 2 4 +1 3 3 +1 4 3 +2 2 4 +2 5 4 +2 +1 +2 +1 3 3 +1 4 3 +2 5 4 +2 2 4 +2 diff --git a/tests/queries/0_stateless/01322_monotonous_order_by_with_different_variables.sql b/tests/queries/0_stateless/01322_monotonous_order_by_with_different_variables.sql new file mode 100644 index 00000000000..6fda42cbed1 --- /dev/null +++ b/tests/queries/0_stateless/01322_monotonous_order_by_with_different_variables.sql @@ -0,0 +1,21 @@ +DROP TABLE IF EXISTS test; +CREATE TABLE test (x Int8, y Int8, z Int8) ENGINE = MergeTree ORDER BY tuple(); +INSERT INTO test VALUES (1, 3, 3), (1, 4, 3), (2, 5, 4), (2, 2, 4); + +SET optimize_monotonous_functions_in_order_by = 1; +SELECT * FROM test ORDER BY toFloat32(x), -y, -z DESC; +SELECT * FROM test ORDER BY toFloat32(x), -(-y), -z DESC; +SELECT max(x) as k FROM test ORDER BY k; +SELECT roundToExp2(x) as k FROM test GROUP BY k ORDER BY k; +SELECT roundToExp2(x) as k, y, z FROM test WHERE k >= 1 ORDER BY k; +SELECT max(x) as k FROM test HAVING k > 0 ORDER BY k; + +SET optimize_monotonous_functions_in_order_by = 0; +SELECT * FROM test ORDER BY toFloat32(x), -y, -z DESC; +SELECT * FROM test ORDER BY toFloat32(x), -(-y), -z DESC; +SELECT max(x) as k FROM test ORDER BY k; +SELECT roundToExp2(x) as k From test GROUP BY k ORDER BY k; +SELECT roundToExp2(x) as k, y, z FROM test WHERE k >= 1 ORDER BY k; +SELECT max(x) as k FROM test HAVING k > 0 ORDER BY k; + +DROP TABLE test;