mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-23 08:02:02 +00:00
In KeyCondition: Fixed execution of inversed predicates for non-strictly monotinic functional index (#9223)
* Tests for functional index * Fixed execution of inversed predicates in functional index When non-strictly monotonic functional index is used inverted predicated may be executed incorrectly, which leads to multiple problems: #8821, #9034
This commit is contained in:
parent
17383be8ca
commit
5a67c02a5d
@ -264,6 +264,78 @@ const KeyCondition::AtomMap KeyCondition::atom_map
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
static const std::map<std::string, std::string> inverse_relations = {
|
||||||
|
{"equals", "notEquals"},
|
||||||
|
{"notEquals", "equals"},
|
||||||
|
{"less", "greaterOrEquals"},
|
||||||
|
{"greaterOrEquals", "less"},
|
||||||
|
{"greater", "lessOrEquals"},
|
||||||
|
{"lessOrEquals", "greater"},
|
||||||
|
{"in", "notIn"},
|
||||||
|
{"notIn", "in"},
|
||||||
|
{"like", "notLike"},
|
||||||
|
{"notLike", "like"},
|
||||||
|
{"empty", "notEmpty"},
|
||||||
|
{"notEmpty", "empty"},
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
bool isLogicalOperator(const String & func_name)
|
||||||
|
{
|
||||||
|
return (func_name == "and" || func_name == "or" || func_name == "not" || func_name == "indexHint");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The node can be one of:
|
||||||
|
/// - Logical operator (AND, OR, NOT and indexHint() - logical NOOP)
|
||||||
|
/// - An "atom" (relational operator, constant, expression)
|
||||||
|
/// - A logical constant expression
|
||||||
|
/// - Any other function
|
||||||
|
ASTPtr cloneASTWithInversionPushDown(const ASTPtr node, const bool need_inversion = false)
|
||||||
|
{
|
||||||
|
const ASTFunction * func = node->as<ASTFunction>();
|
||||||
|
|
||||||
|
if (func && isLogicalOperator(func->name))
|
||||||
|
{
|
||||||
|
if (func->name == "not")
|
||||||
|
{
|
||||||
|
return cloneASTWithInversionPushDown(func->arguments->children.front(), !need_inversion);
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto result_node = makeASTFunction(func->name);
|
||||||
|
|
||||||
|
/// indexHint() is a special case - logical NOOP function
|
||||||
|
if (result_node->name != "indexHint" && need_inversion)
|
||||||
|
{
|
||||||
|
result_node->name = (result_node->name == "and") ? "or" : "and";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (func->arguments)
|
||||||
|
{
|
||||||
|
for (const auto & child : func->arguments->children)
|
||||||
|
{
|
||||||
|
result_node->arguments->children.push_back(cloneASTWithInversionPushDown(child, need_inversion));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result_node;
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto cloned_node = node->clone();
|
||||||
|
|
||||||
|
if (func && inverse_relations.find(func->name) != inverse_relations.cend())
|
||||||
|
{
|
||||||
|
if (need_inversion)
|
||||||
|
{
|
||||||
|
cloned_node->as<ASTFunction>()->name = inverse_relations.at(func->name);
|
||||||
|
}
|
||||||
|
|
||||||
|
return cloned_node;
|
||||||
|
}
|
||||||
|
|
||||||
|
return need_inversion ? makeASTFunction("not", cloned_node) : cloned_node;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
inline bool Range::equals(const Field & lhs, const Field & rhs) { return applyVisitor(FieldVisitorAccurateEquals(), lhs, rhs); }
|
inline bool Range::equals(const Field & lhs, const Field & rhs) { return applyVisitor(FieldVisitorAccurateEquals(), lhs, rhs); }
|
||||||
inline bool Range::less(const Field & lhs, const Field & rhs) { return applyVisitor(FieldVisitorAccurateLess(), lhs, rhs); }
|
inline bool Range::less(const Field & lhs, const Field & rhs) { return applyVisitor(FieldVisitorAccurateLess(), lhs, rhs); }
|
||||||
|
|
||||||
@ -345,21 +417,23 @@ KeyCondition::KeyCondition(
|
|||||||
*/
|
*/
|
||||||
Block block_with_constants = getBlockWithConstants(query_info.query, query_info.syntax_analyzer_result, context);
|
Block block_with_constants = getBlockWithConstants(query_info.query, query_info.syntax_analyzer_result, context);
|
||||||
|
|
||||||
/// Trasform WHERE section to Reverse Polish notation
|
const ASTSelectQuery & select = query_info.query->as<ASTSelectQuery &>();
|
||||||
const auto & select = query_info.query->as<ASTSelectQuery &>();
|
if (select.where() || select.prewhere())
|
||||||
if (select.where())
|
|
||||||
{
|
{
|
||||||
traverseAST(select.where(), context, block_with_constants);
|
ASTPtr filter_query;
|
||||||
|
if (select.where() && select.prewhere())
|
||||||
|
filter_query = makeASTFunction("and", select.where(), select.prewhere());
|
||||||
|
else
|
||||||
|
filter_query = select.where() ? select.where() : select.prewhere();
|
||||||
|
|
||||||
if (select.prewhere())
|
/** When non-strictly monotonic functions are employed in functional index (e.g. ORDER BY toStartOfHour(dateTime)),
|
||||||
{
|
* the use of NOT operator in predicate will result in the indexing algorithm leave out some data.
|
||||||
traverseAST(select.prewhere(), context, block_with_constants);
|
* This is caused by rewriting in KeyCondition::tryParseAtomFromAST of relational operators to less strict
|
||||||
rpn.emplace_back(RPNElement::FUNCTION_AND);
|
* when parsing the AST into internal RPN representation.
|
||||||
}
|
* To overcome the problem, before parsing the AST we transform it to its semantically equivalent form where all NOT's
|
||||||
}
|
* are pushed down and applied (when possible) to leaf nodes.
|
||||||
else if (select.prewhere())
|
*/
|
||||||
{
|
traverseAST(cloneASTWithInversionPushDown(filter_query), context, block_with_constants);
|
||||||
traverseAST(select.prewhere(), context, block_with_constants);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -432,9 +506,9 @@ void KeyCondition::traverseAST(const ASTPtr & node, const Context & context, Blo
|
|||||||
{
|
{
|
||||||
RPNElement element;
|
RPNElement element;
|
||||||
|
|
||||||
if (auto * func = node->as<ASTFunction>())
|
if (const auto * func = node->as<ASTFunction>())
|
||||||
{
|
{
|
||||||
if (operatorFromAST(func, element))
|
if (tryParseLogicalOperatorFromAST(func, element))
|
||||||
{
|
{
|
||||||
auto & args = func->arguments->children;
|
auto & args = func->arguments->children;
|
||||||
for (size_t i = 0, size = args.size(); i < size; ++i)
|
for (size_t i = 0, size = args.size(); i < size; ++i)
|
||||||
@ -452,7 +526,7 @@ void KeyCondition::traverseAST(const ASTPtr & node, const Context & context, Blo
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!atomFromAST(node, context, block_with_constants, element))
|
if (!tryParseAtomFromAST(node, context, block_with_constants, element))
|
||||||
{
|
{
|
||||||
element.function = RPNElement::FUNCTION_UNKNOWN;
|
element.function = RPNElement::FUNCTION_UNKNOWN;
|
||||||
}
|
}
|
||||||
@ -680,7 +754,7 @@ static void castValueToType(const DataTypePtr & desired_type, Field & src_value,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool KeyCondition::atomFromAST(const ASTPtr & node, const Context & context, Block & block_with_constants, RPNElement & out)
|
bool KeyCondition::tryParseAtomFromAST(const ASTPtr & node, const Context & context, Block & block_with_constants, RPNElement & out)
|
||||||
{
|
{
|
||||||
/** Functions < > = != <= >= in `notIn`, where one argument is a constant, and the other is one of columns of key,
|
/** Functions < > = != <= >= in `notIn`, where one argument is a constant, and the other is one of columns of key,
|
||||||
* or itself, wrapped in a chain of possibly-monotonic functions,
|
* or itself, wrapped in a chain of possibly-monotonic functions,
|
||||||
@ -768,7 +842,9 @@ bool KeyCondition::atomFromAST(const ASTPtr & node, const Context & context, Blo
|
|||||||
func_name = "lessOrEquals";
|
func_name = "lessOrEquals";
|
||||||
else if (func_name == "lessOrEquals")
|
else if (func_name == "lessOrEquals")
|
||||||
func_name = "greaterOrEquals";
|
func_name = "greaterOrEquals";
|
||||||
else if (func_name == "in" || func_name == "notIn" || func_name == "like")
|
else if (func_name == "in" || func_name == "notIn" ||
|
||||||
|
func_name == "like" || func_name == "notLike" ||
|
||||||
|
func_name == "startsWith")
|
||||||
{
|
{
|
||||||
/// "const IN data_column" doesn't make sense (unlike "data_column IN const")
|
/// "const IN data_column" doesn't make sense (unlike "data_column IN const")
|
||||||
return false;
|
return false;
|
||||||
@ -809,7 +885,7 @@ bool KeyCondition::atomFromAST(const ASTPtr & node, const Context & context, Blo
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool KeyCondition::operatorFromAST(const ASTFunction * func, RPNElement & out)
|
bool KeyCondition::tryParseLogicalOperatorFromAST(const ASTFunction * func, RPNElement & out)
|
||||||
{
|
{
|
||||||
/// Functions AND, OR, NOT.
|
/// Functions AND, OR, NOT.
|
||||||
/** Also a special function `indexHint` - works as if instead of calling a function there are just parentheses
|
/** Also a special function `indexHint` - works as if instead of calling a function there are just parentheses
|
||||||
|
@ -369,8 +369,8 @@ private:
|
|||||||
BoolMask initial_mask) const;
|
BoolMask initial_mask) const;
|
||||||
|
|
||||||
void traverseAST(const ASTPtr & node, const Context & context, Block & block_with_constants);
|
void traverseAST(const ASTPtr & node, const Context & context, Block & block_with_constants);
|
||||||
bool atomFromAST(const ASTPtr & node, const Context & context, Block & block_with_constants, RPNElement & out);
|
bool tryParseAtomFromAST(const ASTPtr & node, const Context & context, Block & block_with_constants, RPNElement & out);
|
||||||
bool operatorFromAST(const ASTFunction * func, RPNElement & out);
|
bool tryParseLogicalOperatorFromAST(const ASTFunction * func, RPNElement & out);
|
||||||
|
|
||||||
/** Is node the key column
|
/** Is node the key column
|
||||||
* or expression in which column of key is wrapped by chain of functions,
|
* or expression in which column of key is wrapped by chain of functions,
|
||||||
|
@ -0,0 +1,33 @@
|
|||||||
|
TP1
|
||||||
|
7.51
|
||||||
|
7.42
|
||||||
|
7.41
|
||||||
|
7.42
|
||||||
|
7.41
|
||||||
|
7.42
|
||||||
|
7.41
|
||||||
|
7.42
|
||||||
|
7.41
|
||||||
|
7.51
|
||||||
|
TP2
|
||||||
|
7.42
|
||||||
|
7.41
|
||||||
|
7.42
|
||||||
|
7.51
|
||||||
|
7.42
|
||||||
|
7.41
|
||||||
|
7.51
|
||||||
|
7.51
|
||||||
|
TP3
|
||||||
|
7.42
|
||||||
|
7.41
|
||||||
|
7.51
|
||||||
|
TP4
|
||||||
|
7.42
|
||||||
|
7.41
|
||||||
|
7.42
|
||||||
|
7.42
|
||||||
|
7.41
|
||||||
|
TP5
|
||||||
|
7.41
|
||||||
|
7.51
|
@ -0,0 +1,33 @@
|
|||||||
|
SET max_threads = 1;
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS functional_index_mergetree (x Float64) ENGINE = MergeTree ORDER BY round(x);
|
||||||
|
INSERT INTO functional_index_mergetree VALUES (7.42)(7.41)(7.51);
|
||||||
|
|
||||||
|
SELECT 'TP1';
|
||||||
|
SELECT * FROM functional_index_mergetree WHERE x > 7.42;
|
||||||
|
SELECT * FROM functional_index_mergetree WHERE x < 7.49;
|
||||||
|
SELECT * FROM functional_index_mergetree WHERE x < 7.5;
|
||||||
|
|
||||||
|
SELECT * FROM functional_index_mergetree WHERE NOT (NOT x < 7.49);
|
||||||
|
SELECT * FROM functional_index_mergetree WHERE NOT (NOT x < 7.5);
|
||||||
|
SELECT * FROM functional_index_mergetree WHERE NOT (NOT x > 7.42);
|
||||||
|
|
||||||
|
SELECT 'TP2';
|
||||||
|
SELECT * FROM functional_index_mergetree WHERE NOT x > 7.49;
|
||||||
|
SELECT * FROM functional_index_mergetree WHERE NOT x < 7.42;
|
||||||
|
SELECT * FROM functional_index_mergetree WHERE NOT x < 7.41;
|
||||||
|
SELECT * FROM functional_index_mergetree WHERE NOT x < 7.5;
|
||||||
|
|
||||||
|
SELECT 'TP3';
|
||||||
|
SELECT * FROM functional_index_mergetree WHERE x > 7.41 AND x < 7.51;
|
||||||
|
SELECT * FROM functional_index_mergetree WHERE NOT (x > 7.41 AND x < 7.51);
|
||||||
|
|
||||||
|
SELECT 'TP4';
|
||||||
|
SELECT * FROM functional_index_mergetree WHERE NOT x < 7.41 AND NOT x > 7.49;
|
||||||
|
SELECT * FROM functional_index_mergetree WHERE NOT x < 7.42 AND NOT x > 7.42;
|
||||||
|
SELECT * FROM functional_index_mergetree WHERE (NOT x < 7.4) AND (NOT x > 7.49);
|
||||||
|
|
||||||
|
SELECT 'TP5';
|
||||||
|
SELECT * FROM functional_index_mergetree WHERE NOT or(NOT x, toUInt64(x) AND NOT floor(x) > 6, x >= 7.42 AND round(x) <= 7);
|
||||||
|
|
||||||
|
DROP TABLE functional_index_mergetree;
|
Loading…
Reference in New Issue
Block a user