Optimize predicate with toYear converter

The date converters, such as toYear, are widely used in the where
clauses of the SQL queries, however, these conversions are often
expensive due to the complexity of the calendar system.

The function preimage is found an optimization for the predicates
with the converters. Given a predicate, toYear(c) = y, we could
convert it to its equivalent form: c >= b AND c <= e, where b is
"y-01-01" and e is "y-12-31". The similar transformation applies
to other comparisons (<>, <, >, <=, <=).

This commit implemented the above transformation at the AST level
by adding a new pass in the TreeOptimizer and a new AST visitor
for in-place replacing the predicates of toYear with the converted
ones.
This commit is contained in:
Zhiguo Zhou 2023-05-19 23:58:32 +08:00
parent a9d5b68946
commit 773a5bbbaa
5 changed files with 217 additions and 0 deletions

View File

@ -0,0 +1,122 @@
#include <Interpreters/OptimizeDateFilterVisitor.h>
#include <Common/DateLUT.h>
#include <Common/DateLUTImpl.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTFunction.h>
namespace DB
{
ASTPtr generateOptimizedDateFilterAST(const String & comparator, const String & converter, const String & column, UInt64 compare_to)
{
const DateLUTImpl & date_lut = DateLUT::instance();
if (converter != "toYear") return {};
UInt64 year = compare_to;
String from_date = date_lut.dateToString(date_lut.makeDayNum(year, 1, 1));
String to_date = date_lut.dateToString(date_lut.makeDayNum(year, 12, 31));
if (comparator == "equals")
{
return makeASTFunction("and",
makeASTFunction("greaterOrEquals",
std::make_shared<ASTIdentifier>(column),
std::make_shared<ASTLiteral>(from_date)
),
makeASTFunction("lessOrEquals",
std::make_shared<ASTIdentifier>(column),
std::make_shared<ASTLiteral>(to_date)
)
);
}
else if (comparator == "notEquals")
{
return makeASTFunction("or",
makeASTFunction("less",
std::make_shared<ASTIdentifier>(column),
std::make_shared<ASTLiteral>(from_date)
),
makeASTFunction("greater",
std::make_shared<ASTIdentifier>(column),
std::make_shared<ASTLiteral>(to_date)
)
);
}
else if (comparator == "less" || comparator == "greaterOrEquals")
{
return makeASTFunction(comparator,
std::make_shared<ASTIdentifier>(column),
std::make_shared<ASTLiteral>(from_date)
);
}
else
{
return makeASTFunction(comparator,
std::make_shared<ASTIdentifier>(column),
std::make_shared<ASTLiteral>(to_date)
);
}
}
bool rewritePredicateInPlace(ASTFunction & function, ASTPtr & ast)
{
const static std::unordered_map<String, String> swap_relations = {
{"equals", "equals"},
{"notEquals", "notEquals"},
{"less", "greater"},
{"greater", "less"},
{"lessOrEquals", "greaterOrEquals"},
{"greaterOrEquals", "lessOrEquals"},
};
if (!swap_relations.contains(function.name)) return false;
if (!function.arguments || function.arguments->children.size() != 2) return false;
size_t func_id = function.arguments->children.size();
for (size_t i = 0; i < function.arguments->children.size(); i++)
{
if (const auto * func = function.arguments->children[i]->as<ASTFunction>(); func)
{
if (func->name == "toYear")
{
func_id = i;
}
}
}
if (func_id == function.arguments->children.size()) return false;
size_t literal_id = 1 - func_id;
const auto * literal = function.arguments->children[literal_id]->as<ASTLiteral>();
if (!literal || literal->value.getType() != Field::Types::UInt64) return false;
UInt64 compare_to = literal->value.get<UInt64>();
String comparator = literal_id > func_id ? function.name : swap_relations.at(function.name);
const auto * func = function.arguments->children[func_id]->as<ASTFunction>();
const auto * column_id = func->arguments->children.at(0)->as<ASTIdentifier>();
if (!column_id) return false;
String column = column_id->name();
const auto new_ast = generateOptimizedDateFilterAST(comparator, func->name, column, compare_to);
if (!new_ast) return false;
ast = new_ast;
return true;
}
void OptimizeDateFilterInPlaceData::visit(ASTFunction & function, ASTPtr & ast) const
{
rewritePredicateInPlace(function, ast);
}
}

View File

@ -0,0 +1,20 @@
#pragma once
#include <Interpreters/InDepthNodeVisitor.h>
namespace DB
{
class ASTFunction;
/// Rewrite the predicates in place
class OptimizeDateFilterInPlaceData
{
public:
using TypeToVisit = ASTFunction;
void visit(ASTFunction & function, ASTPtr & ast) const;
};
using OptimizeDateFilterInPlaceMatcher = OneTypeMatcher<OptimizeDateFilterInPlaceData>;
using OptimizeDateFilterInPlaceVisitor = InDepthNodeVisitor<OptimizeDateFilterInPlaceMatcher, true>;
}

View File

@ -25,6 +25,7 @@
#include <Interpreters/GatherFunctionQuantileVisitor.h>
#include <Interpreters/RewriteSumIfFunctionVisitor.h>
#include <Interpreters/RewriteArrayExistsFunctionVisitor.h>
#include <Interpreters/OptimizeDateFilterVisitor.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTFunction.h>
@ -677,6 +678,21 @@ void optimizeInjectiveFunctionsInsideUniq(ASTPtr & query, ContextPtr context)
RemoveInjectiveFunctionsVisitor(data).visit(query);
}
void optimizeDateFilters(ASTSelectQuery * select_query)
{
/// Predicates in HAVING clause has been moved to WHERE clause.
if (select_query->where())
{
OptimizeDateFilterInPlaceVisitor::Data data;
OptimizeDateFilterInPlaceVisitor(data).visit(select_query->refWhere());
}
if (select_query->prewhere())
{
OptimizeDateFilterInPlaceVisitor::Data data;
OptimizeDateFilterInPlaceVisitor(data).visit(select_query->refPrewhere());
}
}
void transformIfStringsIntoEnum(ASTPtr & query)
{
std::unordered_set<String> function_names = {"if", "transform"};
@ -780,6 +796,9 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result,
tables_with_columns, result.storage_snapshot->metadata, result.storage);
}
/// Rewrite date filters to avoid the calls of converters such as toYear, toYYYYMM, toISOWeek, etc.
optimizeDateFilters(select_query);
/// GROUP BY injective function elimination.
optimizeGroupBy(select_query, context);

View File

@ -0,0 +1,39 @@
SELECT value1
FROM t
WHERE ((date1 >= \'1993-01-01\') AND (date1 <= \'1993-12-31\')) AND ((id >= 1) AND (id <= 3))
SELECT value1
FROM t
WHERE ((date1 < \'1993-01-01\') OR (date1 > \'1993-12-31\')) AND ((id >= 1) AND (id <= 3))
SELECT value1
FROM t
WHERE (date1 < \'1993-01-01\') AND ((id >= 1) AND (id <= 3))
SELECT value1
FROM t
WHERE (date1 > \'1993-12-31\') AND ((id >= 1) AND (id <= 3))
SELECT value1
FROM t
WHERE (date1 <= \'1993-12-31\') AND ((id >= 1) AND (id <= 3))
SELECT value1
FROM t
WHERE (date1 >= \'1993-01-01\') AND ((id >= 1) AND (id <= 3))
SELECT value1
FROM t
WHERE ((date1 >= \'1993-01-01\') AND (date1 <= \'1997-12-31\')) AND ((id >= 1) AND (id <= 3))
SELECT value1
FROM t
WHERE (((date1 >= \'1993-01-01\') AND (date1 <= \'1993-12-31\')) OR ((date1 >= \'1994-01-01\') AND (date1 <= \'1994-12-31\'))) AND ((id >= 1) AND (id <= 3))
SELECT
value1,
toYear(date1) AS year1
FROM t
WHERE ((date1 >= \'1993-01-01\') AND (date1 <= \'1993-12-31\')) AND ((id >= 1) AND (id <= 3))
SELECT value1
FROM t
WHERE (date1 < \'1993-01-01\') AND ((id >= 1) AND (id <= 3))
SELECT value1
FROM t
PREWHERE (date1 >= \'1993-01-01\') AND (date1 <= \'1993-12-31\')
WHERE ((date1 >= \'1993-01-01\') AND (date1 <= \'1993-12-31\')) AND ((id >= 1) AND (id <= 3))
SELECT value1
FROM t
WHERE ((id >= 1) AND (id <= 3)) AND ((date1 >= \'1993-01-01\') AND (date1 <= \'1993-12-31\'))

View File

@ -0,0 +1,17 @@
DROP TABLE IF EXISTS t;
CREATE TABLE t (id UInt32, value1 String, date1 Date) ENGINE ReplacingMergeTree() ORDER BY id;
EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYear(date1) = 1993 AND id BETWEEN 1 AND 3;
EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYear(date1) <> 1993 AND id BETWEEN 1 AND 3;
EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYear(date1) < 1993 AND id BETWEEN 1 AND 3;
EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYear(date1) > 1993 AND id BETWEEN 1 AND 3;
EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYear(date1) <= 1993 AND id BETWEEN 1 AND 3;
EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYear(date1) >= 1993 AND id BETWEEN 1 AND 3;
EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYear(date1) BETWEEN 1993 AND 1997 AND id BETWEEN 1 AND 3;
EXPLAIN SYNTAX SELECT value1 FROM t WHERE (toYear(date1) = 1993 OR toYear(date1) = 1994) AND id BETWEEN 1 AND 3;
EXPLAIN SYNTAX SELECT value1, toYear(date1) as year1 FROM t WHERE year1 = 1993 AND id BETWEEN 1 AND 3;
EXPLAIN SYNTAX SELECT value1 FROM t WHERE 1993 > toYear(date1) AND id BETWEEN 1 AND 3;
EXPLAIN SYNTAX SELECT value1 FROM t PREWHERE toYear(date1) = 1993 WHERE id BETWEEN 1 AND 3;
EXPLAIN SYNTAX SELECT value1 FROM t WHERE id BETWEEN 1 AND 3 HAVING toYear(date1) = 1993;
DROP TABLE t;