In KeyCondition: Fixed execution of inversed predicates for non-strictly monotinic functional index (#9223)

* Tests for functional index

* Fixed execution of inversed predicates in functional index

When non-strictly monotonic functional index is used inverted predicated
may be executed incorrectly, which leads to multiple problems: #8821,
#9034
This commit is contained in:
Alexander Kazakov 2020-02-20 12:53:02 +03:00 committed by GitHub
parent 17383be8ca
commit 5a67c02a5d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 163 additions and 21 deletions

View File

@ -264,6 +264,78 @@ const KeyCondition::AtomMap KeyCondition::atom_map
}; };
static const std::map<std::string, std::string> inverse_relations = {
{"equals", "notEquals"},
{"notEquals", "equals"},
{"less", "greaterOrEquals"},
{"greaterOrEquals", "less"},
{"greater", "lessOrEquals"},
{"lessOrEquals", "greater"},
{"in", "notIn"},
{"notIn", "in"},
{"like", "notLike"},
{"notLike", "like"},
{"empty", "notEmpty"},
{"notEmpty", "empty"},
};
bool isLogicalOperator(const String & func_name)
{
return (func_name == "and" || func_name == "or" || func_name == "not" || func_name == "indexHint");
}
/// The node can be one of:
/// - Logical operator (AND, OR, NOT and indexHint() - logical NOOP)
/// - An "atom" (relational operator, constant, expression)
/// - A logical constant expression
/// - Any other function
ASTPtr cloneASTWithInversionPushDown(const ASTPtr node, const bool need_inversion = false)
{
const ASTFunction * func = node->as<ASTFunction>();
if (func && isLogicalOperator(func->name))
{
if (func->name == "not")
{
return cloneASTWithInversionPushDown(func->arguments->children.front(), !need_inversion);
}
const auto result_node = makeASTFunction(func->name);
/// indexHint() is a special case - logical NOOP function
if (result_node->name != "indexHint" && need_inversion)
{
result_node->name = (result_node->name == "and") ? "or" : "and";
}
if (func->arguments)
{
for (const auto & child : func->arguments->children)
{
result_node->arguments->children.push_back(cloneASTWithInversionPushDown(child, need_inversion));
}
}
return result_node;
}
const auto cloned_node = node->clone();
if (func && inverse_relations.find(func->name) != inverse_relations.cend())
{
if (need_inversion)
{
cloned_node->as<ASTFunction>()->name = inverse_relations.at(func->name);
}
return cloned_node;
}
return need_inversion ? makeASTFunction("not", cloned_node) : cloned_node;
}
inline bool Range::equals(const Field & lhs, const Field & rhs) { return applyVisitor(FieldVisitorAccurateEquals(), lhs, rhs); } inline bool Range::equals(const Field & lhs, const Field & rhs) { return applyVisitor(FieldVisitorAccurateEquals(), lhs, rhs); }
inline bool Range::less(const Field & lhs, const Field & rhs) { return applyVisitor(FieldVisitorAccurateLess(), lhs, rhs); } inline bool Range::less(const Field & lhs, const Field & rhs) { return applyVisitor(FieldVisitorAccurateLess(), lhs, rhs); }
@ -345,21 +417,23 @@ KeyCondition::KeyCondition(
*/ */
Block block_with_constants = getBlockWithConstants(query_info.query, query_info.syntax_analyzer_result, context); Block block_with_constants = getBlockWithConstants(query_info.query, query_info.syntax_analyzer_result, context);
/// Trasform WHERE section to Reverse Polish notation const ASTSelectQuery & select = query_info.query->as<ASTSelectQuery &>();
const auto & select = query_info.query->as<ASTSelectQuery &>(); if (select.where() || select.prewhere())
if (select.where())
{ {
traverseAST(select.where(), context, block_with_constants); ASTPtr filter_query;
if (select.where() && select.prewhere())
filter_query = makeASTFunction("and", select.where(), select.prewhere());
else
filter_query = select.where() ? select.where() : select.prewhere();
if (select.prewhere()) /** When non-strictly monotonic functions are employed in functional index (e.g. ORDER BY toStartOfHour(dateTime)),
{ * the use of NOT operator in predicate will result in the indexing algorithm leave out some data.
traverseAST(select.prewhere(), context, block_with_constants); * This is caused by rewriting in KeyCondition::tryParseAtomFromAST of relational operators to less strict
rpn.emplace_back(RPNElement::FUNCTION_AND); * when parsing the AST into internal RPN representation.
} * To overcome the problem, before parsing the AST we transform it to its semantically equivalent form where all NOT's
} * are pushed down and applied (when possible) to leaf nodes.
else if (select.prewhere()) */
{ traverseAST(cloneASTWithInversionPushDown(filter_query), context, block_with_constants);
traverseAST(select.prewhere(), context, block_with_constants);
} }
else else
{ {
@ -432,9 +506,9 @@ void KeyCondition::traverseAST(const ASTPtr & node, const Context & context, Blo
{ {
RPNElement element; RPNElement element;
if (auto * func = node->as<ASTFunction>()) if (const auto * func = node->as<ASTFunction>())
{ {
if (operatorFromAST(func, element)) if (tryParseLogicalOperatorFromAST(func, element))
{ {
auto & args = func->arguments->children; auto & args = func->arguments->children;
for (size_t i = 0, size = args.size(); i < size; ++i) for (size_t i = 0, size = args.size(); i < size; ++i)
@ -452,7 +526,7 @@ void KeyCondition::traverseAST(const ASTPtr & node, const Context & context, Blo
} }
} }
if (!atomFromAST(node, context, block_with_constants, element)) if (!tryParseAtomFromAST(node, context, block_with_constants, element))
{ {
element.function = RPNElement::FUNCTION_UNKNOWN; element.function = RPNElement::FUNCTION_UNKNOWN;
} }
@ -680,7 +754,7 @@ static void castValueToType(const DataTypePtr & desired_type, Field & src_value,
} }
bool KeyCondition::atomFromAST(const ASTPtr & node, const Context & context, Block & block_with_constants, RPNElement & out) bool KeyCondition::tryParseAtomFromAST(const ASTPtr & node, const Context & context, Block & block_with_constants, RPNElement & out)
{ {
/** Functions < > = != <= >= in `notIn`, where one argument is a constant, and the other is one of columns of key, /** Functions < > = != <= >= in `notIn`, where one argument is a constant, and the other is one of columns of key,
* or itself, wrapped in a chain of possibly-monotonic functions, * or itself, wrapped in a chain of possibly-monotonic functions,
@ -768,7 +842,9 @@ bool KeyCondition::atomFromAST(const ASTPtr & node, const Context & context, Blo
func_name = "lessOrEquals"; func_name = "lessOrEquals";
else if (func_name == "lessOrEquals") else if (func_name == "lessOrEquals")
func_name = "greaterOrEquals"; func_name = "greaterOrEquals";
else if (func_name == "in" || func_name == "notIn" || func_name == "like") else if (func_name == "in" || func_name == "notIn" ||
func_name == "like" || func_name == "notLike" ||
func_name == "startsWith")
{ {
/// "const IN data_column" doesn't make sense (unlike "data_column IN const") /// "const IN data_column" doesn't make sense (unlike "data_column IN const")
return false; return false;
@ -809,7 +885,7 @@ bool KeyCondition::atomFromAST(const ASTPtr & node, const Context & context, Blo
return false; return false;
} }
bool KeyCondition::operatorFromAST(const ASTFunction * func, RPNElement & out) bool KeyCondition::tryParseLogicalOperatorFromAST(const ASTFunction * func, RPNElement & out)
{ {
/// Functions AND, OR, NOT. /// Functions AND, OR, NOT.
/** Also a special function `indexHint` - works as if instead of calling a function there are just parentheses /** Also a special function `indexHint` - works as if instead of calling a function there are just parentheses

View File

@ -369,8 +369,8 @@ private:
BoolMask initial_mask) const; BoolMask initial_mask) const;
void traverseAST(const ASTPtr & node, const Context & context, Block & block_with_constants); void traverseAST(const ASTPtr & node, const Context & context, Block & block_with_constants);
bool atomFromAST(const ASTPtr & node, const Context & context, Block & block_with_constants, RPNElement & out); bool tryParseAtomFromAST(const ASTPtr & node, const Context & context, Block & block_with_constants, RPNElement & out);
bool operatorFromAST(const ASTFunction * func, RPNElement & out); bool tryParseLogicalOperatorFromAST(const ASTFunction * func, RPNElement & out);
/** Is node the key column /** Is node the key column
* or expression in which column of key is wrapped by chain of functions, * or expression in which column of key is wrapped by chain of functions,

View File

@ -0,0 +1,33 @@
TP1
7.51
7.42
7.41
7.42
7.41
7.42
7.41
7.42
7.41
7.51
TP2
7.42
7.41
7.42
7.51
7.42
7.41
7.51
7.51
TP3
7.42
7.41
7.51
TP4
7.42
7.41
7.42
7.42
7.41
TP5
7.41
7.51

View File

@ -0,0 +1,33 @@
SET max_threads = 1;
CREATE TABLE IF NOT EXISTS functional_index_mergetree (x Float64) ENGINE = MergeTree ORDER BY round(x);
INSERT INTO functional_index_mergetree VALUES (7.42)(7.41)(7.51);
SELECT 'TP1';
SELECT * FROM functional_index_mergetree WHERE x > 7.42;
SELECT * FROM functional_index_mergetree WHERE x < 7.49;
SELECT * FROM functional_index_mergetree WHERE x < 7.5;
SELECT * FROM functional_index_mergetree WHERE NOT (NOT x < 7.49);
SELECT * FROM functional_index_mergetree WHERE NOT (NOT x < 7.5);
SELECT * FROM functional_index_mergetree WHERE NOT (NOT x > 7.42);
SELECT 'TP2';
SELECT * FROM functional_index_mergetree WHERE NOT x > 7.49;
SELECT * FROM functional_index_mergetree WHERE NOT x < 7.42;
SELECT * FROM functional_index_mergetree WHERE NOT x < 7.41;
SELECT * FROM functional_index_mergetree WHERE NOT x < 7.5;
SELECT 'TP3';
SELECT * FROM functional_index_mergetree WHERE x > 7.41 AND x < 7.51;
SELECT * FROM functional_index_mergetree WHERE NOT (x > 7.41 AND x < 7.51);
SELECT 'TP4';
SELECT * FROM functional_index_mergetree WHERE NOT x < 7.41 AND NOT x > 7.49;
SELECT * FROM functional_index_mergetree WHERE NOT x < 7.42 AND NOT x > 7.42;
SELECT * FROM functional_index_mergetree WHERE (NOT x < 7.4) AND (NOT x > 7.49);
SELECT 'TP5';
SELECT * FROM functional_index_mergetree WHERE NOT or(NOT x, toUInt64(x) AND NOT floor(x) > 6, x >= 7.42 AND round(x) <= 7);
DROP TABLE functional_index_mergetree;