From 773a5bbbaae066ac83e35b0215897de949447063 Mon Sep 17 00:00:00 2001 From: Zhiguo Zhou Date: Fri, 19 May 2023 23:58:32 +0800 Subject: [PATCH] Optimize predicate with toYear converter The date converters, such as toYear, are widely used in the where clauses of the SQL queries, however, these conversions are often expensive due to the complexity of the calendar system. The function preimage is found an optimization for the predicates with the converters. Given a predicate, toYear(c) = y, we could convert it to its equivalent form: c >= b AND c <= e, where b is "y-01-01" and e is "y-12-31". The similar transformation applies to other comparisons (<>, <, >, <=, <=). This commit implemented the above transformation at the AST level by adding a new pass in the TreeOptimizer and a new AST visitor for in-place replacing the predicates of toYear with the converted ones. --- .../OptimizeDateFilterVisitor.cpp | 122 ++++++++++++++++++ src/Interpreters/OptimizeDateFilterVisitor.h | 20 +++ src/Interpreters/TreeOptimizer.cpp | 19 +++ ..._date_filter_predicate_optimizer.reference | 39 ++++++ .../02764_date_filter_predicate_optimizer.sql | 17 +++ 5 files changed, 217 insertions(+) create mode 100644 src/Interpreters/OptimizeDateFilterVisitor.cpp create mode 100644 src/Interpreters/OptimizeDateFilterVisitor.h create mode 100644 tests/queries/0_stateless/02764_date_filter_predicate_optimizer.reference create mode 100644 tests/queries/0_stateless/02764_date_filter_predicate_optimizer.sql diff --git a/src/Interpreters/OptimizeDateFilterVisitor.cpp b/src/Interpreters/OptimizeDateFilterVisitor.cpp new file mode 100644 index 00000000000..4c714751f7d --- /dev/null +++ b/src/Interpreters/OptimizeDateFilterVisitor.cpp @@ -0,0 +1,122 @@ +#include + +#include +#include +#include +#include +#include + + +namespace DB +{ + +ASTPtr generateOptimizedDateFilterAST(const String & comparator, const String & converter, const String & column, UInt64 compare_to) +{ + const DateLUTImpl & date_lut = DateLUT::instance(); + + if (converter != "toYear") return {}; + + UInt64 year = compare_to; + String from_date = date_lut.dateToString(date_lut.makeDayNum(year, 1, 1)); + String to_date = date_lut.dateToString(date_lut.makeDayNum(year, 12, 31)); + + if (comparator == "equals") + { + return makeASTFunction("and", + makeASTFunction("greaterOrEquals", + std::make_shared(column), + std::make_shared(from_date) + ), + makeASTFunction("lessOrEquals", + std::make_shared(column), + std::make_shared(to_date) + ) + ); + } + else if (comparator == "notEquals") + { + return makeASTFunction("or", + makeASTFunction("less", + std::make_shared(column), + std::make_shared(from_date) + ), + makeASTFunction("greater", + std::make_shared(column), + std::make_shared(to_date) + ) + ); + } + else if (comparator == "less" || comparator == "greaterOrEquals") + { + return makeASTFunction(comparator, + std::make_shared(column), + std::make_shared(from_date) + ); + } + else + { + return makeASTFunction(comparator, + std::make_shared(column), + std::make_shared(to_date) + ); + } +} + +bool rewritePredicateInPlace(ASTFunction & function, ASTPtr & ast) +{ + const static std::unordered_map swap_relations = { + {"equals", "equals"}, + {"notEquals", "notEquals"}, + {"less", "greater"}, + {"greater", "less"}, + {"lessOrEquals", "greaterOrEquals"}, + {"greaterOrEquals", "lessOrEquals"}, + }; + + if (!swap_relations.contains(function.name)) return false; + + if (!function.arguments || function.arguments->children.size() != 2) return false; + + size_t func_id = function.arguments->children.size(); + + for (size_t i = 0; i < function.arguments->children.size(); i++) + { + if (const auto * func = function.arguments->children[i]->as(); func) + { + if (func->name == "toYear") + { + func_id = i; + } + } + } + + if (func_id == function.arguments->children.size()) return false; + + size_t literal_id = 1 - func_id; + const auto * literal = function.arguments->children[literal_id]->as(); + + if (!literal || literal->value.getType() != Field::Types::UInt64) return false; + + UInt64 compare_to = literal->value.get(); + String comparator = literal_id > func_id ? function.name : swap_relations.at(function.name); + + const auto * func = function.arguments->children[func_id]->as(); + const auto * column_id = func->arguments->children.at(0)->as(); + + if (!column_id) return false; + + String column = column_id->name(); + + const auto new_ast = generateOptimizedDateFilterAST(comparator, func->name, column, compare_to); + + if (!new_ast) return false; + + ast = new_ast; + return true; +} + +void OptimizeDateFilterInPlaceData::visit(ASTFunction & function, ASTPtr & ast) const +{ + rewritePredicateInPlace(function, ast); +} +} diff --git a/src/Interpreters/OptimizeDateFilterVisitor.h b/src/Interpreters/OptimizeDateFilterVisitor.h new file mode 100644 index 00000000000..84394372901 --- /dev/null +++ b/src/Interpreters/OptimizeDateFilterVisitor.h @@ -0,0 +1,20 @@ +#pragma once + +#include + +namespace DB +{ + +class ASTFunction; + +/// Rewrite the predicates in place +class OptimizeDateFilterInPlaceData +{ +public: + using TypeToVisit = ASTFunction; + void visit(ASTFunction & function, ASTPtr & ast) const; +}; + +using OptimizeDateFilterInPlaceMatcher = OneTypeMatcher; +using OptimizeDateFilterInPlaceVisitor = InDepthNodeVisitor; +} diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index c38b3c79026..825114b20b7 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -677,6 +678,21 @@ void optimizeInjectiveFunctionsInsideUniq(ASTPtr & query, ContextPtr context) RemoveInjectiveFunctionsVisitor(data).visit(query); } +void optimizeDateFilters(ASTSelectQuery * select_query) +{ + /// Predicates in HAVING clause has been moved to WHERE clause. + if (select_query->where()) + { + OptimizeDateFilterInPlaceVisitor::Data data; + OptimizeDateFilterInPlaceVisitor(data).visit(select_query->refWhere()); + } + if (select_query->prewhere()) + { + OptimizeDateFilterInPlaceVisitor::Data data; + OptimizeDateFilterInPlaceVisitor(data).visit(select_query->refPrewhere()); + } +} + void transformIfStringsIntoEnum(ASTPtr & query) { std::unordered_set function_names = {"if", "transform"}; @@ -780,6 +796,9 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result, tables_with_columns, result.storage_snapshot->metadata, result.storage); } + /// Rewrite date filters to avoid the calls of converters such as toYear, toYYYYMM, toISOWeek, etc. + optimizeDateFilters(select_query); + /// GROUP BY injective function elimination. optimizeGroupBy(select_query, context); diff --git a/tests/queries/0_stateless/02764_date_filter_predicate_optimizer.reference b/tests/queries/0_stateless/02764_date_filter_predicate_optimizer.reference new file mode 100644 index 00000000000..e5c608ddc1a --- /dev/null +++ b/tests/queries/0_stateless/02764_date_filter_predicate_optimizer.reference @@ -0,0 +1,39 @@ +SELECT value1 +FROM t +WHERE ((date1 >= \'1993-01-01\') AND (date1 <= \'1993-12-31\')) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM t +WHERE ((date1 < \'1993-01-01\') OR (date1 > \'1993-12-31\')) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM t +WHERE (date1 < \'1993-01-01\') AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM t +WHERE (date1 > \'1993-12-31\') AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM t +WHERE (date1 <= \'1993-12-31\') AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM t +WHERE (date1 >= \'1993-01-01\') AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM t +WHERE ((date1 >= \'1993-01-01\') AND (date1 <= \'1997-12-31\')) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM t +WHERE (((date1 >= \'1993-01-01\') AND (date1 <= \'1993-12-31\')) OR ((date1 >= \'1994-01-01\') AND (date1 <= \'1994-12-31\'))) AND ((id >= 1) AND (id <= 3)) +SELECT + value1, + toYear(date1) AS year1 +FROM t +WHERE ((date1 >= \'1993-01-01\') AND (date1 <= \'1993-12-31\')) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM t +WHERE (date1 < \'1993-01-01\') AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM t +PREWHERE (date1 >= \'1993-01-01\') AND (date1 <= \'1993-12-31\') +WHERE ((date1 >= \'1993-01-01\') AND (date1 <= \'1993-12-31\')) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM t +WHERE ((id >= 1) AND (id <= 3)) AND ((date1 >= \'1993-01-01\') AND (date1 <= \'1993-12-31\')) diff --git a/tests/queries/0_stateless/02764_date_filter_predicate_optimizer.sql b/tests/queries/0_stateless/02764_date_filter_predicate_optimizer.sql new file mode 100644 index 00000000000..563468d4f82 --- /dev/null +++ b/tests/queries/0_stateless/02764_date_filter_predicate_optimizer.sql @@ -0,0 +1,17 @@ +DROP TABLE IF EXISTS t; +CREATE TABLE t (id UInt32, value1 String, date1 Date) ENGINE ReplacingMergeTree() ORDER BY id; + +EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYear(date1) = 1993 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYear(date1) <> 1993 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYear(date1) < 1993 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYear(date1) > 1993 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYear(date1) <= 1993 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYear(date1) >= 1993 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYear(date1) BETWEEN 1993 AND 1997 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM t WHERE (toYear(date1) = 1993 OR toYear(date1) = 1994) AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1, toYear(date1) as year1 FROM t WHERE year1 = 1993 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM t WHERE 1993 > toYear(date1) AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM t PREWHERE toYear(date1) = 1993 WHERE id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM t WHERE id BETWEEN 1 AND 3 HAVING toYear(date1) = 1993; + +DROP TABLE t;