mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-02 12:32:04 +00:00
Add an optimization that removes redundant equality
checks on boolean functions. This fixes a bug in which the primary index is not used for queries like SELECT * FROM <table> WHERE <pk> in (<n>) = 1
This commit is contained in:
parent
f36ae13f97
commit
9d4f1d890e
@ -19,6 +19,19 @@ namespace ErrorCodes
|
|||||||
extern const int LOGICAL_ERROR;
|
extern const int LOGICAL_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
using namespace std::literals;
|
||||||
|
static constexpr std::array boolean_functions{
|
||||||
|
"equals"sv, "notEquals"sv, "less"sv, "greaterOrEquals"sv, "greater"sv, "lessOrEquals"sv, "in"sv, "notIn"sv,
|
||||||
|
"globalIn"sv, "globalNotIn"sv, "nullIn"sv, "notNullIn"sv, "globalNullIn"sv, "globalNullNotIn"sv, "isNull"sv, "isNotNull"sv,
|
||||||
|
"like"sv, "notLike"sv, "ilike"sv, "notILike"sv, "empty"sv, "notEmpty"sv, "not"sv, "and"sv,
|
||||||
|
"or"sv};
|
||||||
|
|
||||||
|
static bool isBooleanFunction(const String & func_name)
|
||||||
|
{
|
||||||
|
return std::any_of(
|
||||||
|
boolean_functions.begin(), boolean_functions.end(), [&](const auto boolean_func) { return func_name == boolean_func; });
|
||||||
|
}
|
||||||
|
|
||||||
/// Visitor that optimizes logical expressions _only_ in JOIN ON section
|
/// Visitor that optimizes logical expressions _only_ in JOIN ON section
|
||||||
class JoinOnLogicalExpressionOptimizerVisitor : public InDepthQueryTreeVisitorWithContext<JoinOnLogicalExpressionOptimizerVisitor>
|
class JoinOnLogicalExpressionOptimizerVisitor : public InDepthQueryTreeVisitorWithContext<JoinOnLogicalExpressionOptimizerVisitor>
|
||||||
{
|
{
|
||||||
@ -253,6 +266,12 @@ public:
|
|||||||
tryOptimizeAndEqualsNotEqualsChain(node);
|
tryOptimizeAndEqualsNotEqualsChain(node);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (function_node->getFunctionName() == "equals")
|
||||||
|
{
|
||||||
|
tryOptimizeOutRedundantEquals(node);
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@ -552,6 +571,63 @@ private:
|
|||||||
function_node.getArguments().getNodes() = std::move(or_operands);
|
function_node.getArguments().getNodes() = std::move(or_operands);
|
||||||
function_node.resolveAsFunction(or_function_resolver);
|
function_node.resolveAsFunction(or_function_resolver);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void tryOptimizeOutRedundantEquals(QueryTreeNodePtr & node)
|
||||||
|
{
|
||||||
|
auto & function_node = node->as<FunctionNode &>();
|
||||||
|
assert(function_node.getFunctionName() == "equals");
|
||||||
|
|
||||||
|
bool lhs_const;
|
||||||
|
bool maybe_invert;
|
||||||
|
|
||||||
|
const ConstantNode * constant;
|
||||||
|
const FunctionNode * child_function;
|
||||||
|
|
||||||
|
const auto function_arguments = function_node.getArguments().getNodes();
|
||||||
|
if (function_arguments.size() != 2)
|
||||||
|
return;
|
||||||
|
|
||||||
|
const auto & lhs = function_arguments[0];
|
||||||
|
const auto & rhs = function_arguments[1];
|
||||||
|
|
||||||
|
if ((constant = lhs->as<ConstantNode>()))
|
||||||
|
lhs_const = true;
|
||||||
|
else if ((constant = rhs->as<ConstantNode>()))
|
||||||
|
lhs_const = false;
|
||||||
|
else
|
||||||
|
return;
|
||||||
|
|
||||||
|
UInt64 val;
|
||||||
|
if (!constant->getValue().tryGet<UInt64>(val))
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (val == 1)
|
||||||
|
maybe_invert = false;
|
||||||
|
else if (val == 0)
|
||||||
|
maybe_invert = true;
|
||||||
|
else
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (lhs_const)
|
||||||
|
child_function = rhs->as<FunctionNode>();
|
||||||
|
else
|
||||||
|
child_function = lhs->as<FunctionNode>();
|
||||||
|
|
||||||
|
if (!child_function || !isBooleanFunction(child_function->getFunctionName()))
|
||||||
|
return;
|
||||||
|
if (maybe_invert)
|
||||||
|
{
|
||||||
|
auto not_resolver = FunctionFactory::instance().get("not", getContext());
|
||||||
|
const auto not_node = std::make_shared<FunctionNode>("not");
|
||||||
|
auto & arguments = not_node->getArguments().getNodes();
|
||||||
|
arguments.reserve(1);
|
||||||
|
arguments.push_back(lhs_const ? rhs : lhs);
|
||||||
|
not_node->resolveAsFunction(not_resolver->build(not_node->getArgumentColumns()));
|
||||||
|
node = not_node;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
node = lhs_const ? rhs : lhs;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
void LogicalExpressionOptimizerPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
|
void LogicalExpressionOptimizerPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
|
||||||
|
@ -96,6 +96,18 @@ namespace DB
|
|||||||
*
|
*
|
||||||
* SELECT * FROM t1 JOIN t2 ON a <=> b
|
* SELECT * FROM t1 JOIN t2 ON a <=> b
|
||||||
* -------------------------------
|
* -------------------------------
|
||||||
|
*
|
||||||
|
* 7. Remove redundant equality checks on boolean functions.
|
||||||
|
* - these requndant checks cause the primary index to not be used when if the query involves any primary key columns
|
||||||
|
* -------------------------------
|
||||||
|
* SELECT * FROM t1 WHERE a IN (n) = 1
|
||||||
|
* SELECT * FROM t1 WHERE a IN (n) = 0
|
||||||
|
*
|
||||||
|
* will be transformed into
|
||||||
|
*
|
||||||
|
* SELECT * FROM t1 WHERE a IN (n)
|
||||||
|
* SELECT * FROM t1 WHERE NOT a IN (n)
|
||||||
|
* -------------------------------
|
||||||
*/
|
*/
|
||||||
|
|
||||||
class LogicalExpressionOptimizerPass final : public IQueryTreePass
|
class LogicalExpressionOptimizerPass final : public IQueryTreePass
|
||||||
|
23
tests/queries/0_stateless/03032_redundant_equals.reference
Normal file
23
tests/queries/0_stateless/03032_redundant_equals.reference
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
100
|
||||||
|
100
|
||||||
|
100
|
||||||
|
100
|
||||||
|
100
|
||||||
|
100
|
||||||
|
0
|
||||||
|
0
|
||||||
|
0
|
||||||
|
1
|
||||||
|
100
|
||||||
|
101
|
||||||
|
100
|
||||||
|
101
|
||||||
|
100
|
||||||
|
101
|
||||||
|
100
|
||||||
|
1
|
||||||
|
1
|
||||||
|
1
|
||||||
|
1
|
||||||
|
1
|
||||||
|
1
|
83
tests/queries/0_stateless/03032_redundant_equals.sql
Normal file
83
tests/queries/0_stateless/03032_redundant_equals.sql
Normal file
@ -0,0 +1,83 @@
|
|||||||
|
DROP TABLE IF EXISTS test_table;
|
||||||
|
|
||||||
|
CREATE TABLE test_table
|
||||||
|
(
|
||||||
|
k UInt64,
|
||||||
|
)
|
||||||
|
ENGINE = MergeTree
|
||||||
|
ORDER BY k;
|
||||||
|
|
||||||
|
INSERT INTO test_table SELECT number FROM numbers(10000000);
|
||||||
|
|
||||||
|
SELECT * FROM test_table WHERE k in (100) = 1;
|
||||||
|
SELECT * FROM test_table WHERE k = (100) = 1;
|
||||||
|
SELECT * FROM test_table WHERE k not in (100) = 0;
|
||||||
|
SELECT * FROM test_table WHERE k != (100) = 0;
|
||||||
|
SELECT * FROM test_table WHERE 1 = (k = 100);
|
||||||
|
SELECT * FROM test_table WHERE 0 = (k not in (100));
|
||||||
|
SELECT * FROM test_table WHERE k < 1 = 1;
|
||||||
|
SELECT * FROM test_table WHERE k >= 1 = 0;
|
||||||
|
SELECT * FROM test_table WHERE k > 1 = 0;
|
||||||
|
SELECT * FROM test_table WHERE ((k not in (101) = 0) OR (k in (100) = 1)) = 1;
|
||||||
|
SELECT * FROM test_table WHERE (NOT ((k not in (100) = 0) OR (k in (100) = 1))) = 0;
|
||||||
|
SELECT * FROM test_table WHERE (NOT ((k in (101) = 0) OR (k in (100) = 1))) = 1;
|
||||||
|
SELECT * FROM test_table WHERE ((k not in (101) = 0) OR (k in (100) = 1)) = 1;
|
||||||
|
SELECT * FROM test_table WHERE ((k not in (99) = 1) AND (k in (100) = 1)) = 1;
|
||||||
|
|
||||||
|
SELECT count()
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
EXPLAIN PLAN indexes=1
|
||||||
|
SELECT * FROM test_table WHERE k in (100) = 1
|
||||||
|
)
|
||||||
|
WHERE
|
||||||
|
explain LIKE '%Granules: 1/%';
|
||||||
|
|
||||||
|
SELECT count()
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
EXPLAIN PLAN indexes=1
|
||||||
|
SELECT * FROM test_table WHERE k >= 1 = 0
|
||||||
|
)
|
||||||
|
WHERE
|
||||||
|
explain LIKE '%Granules: 1/%';
|
||||||
|
|
||||||
|
SELECT count()
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
EXPLAIN PLAN indexes=1
|
||||||
|
SELECT * FROM test_table WHERE k not in (100) = 0
|
||||||
|
)
|
||||||
|
WHERE
|
||||||
|
explain LIKE '%Granules: 1/%';
|
||||||
|
|
||||||
|
SELECT count()
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
EXPLAIN PLAN indexes=1
|
||||||
|
SELECT * FROM test_table WHERE k > 1 = 0
|
||||||
|
)
|
||||||
|
WHERE
|
||||||
|
explain LIKE '%Granules: 1/%';
|
||||||
|
|
||||||
|
SELECT count()
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
EXPLAIN PLAN indexes=1
|
||||||
|
SELECT * FROM test_table WHERE (NOT ((k not in (100) = 0) OR (k in (100) = 1))) = 0
|
||||||
|
)
|
||||||
|
WHERE
|
||||||
|
explain LIKE '%Granules: 1/%';
|
||||||
|
|
||||||
|
|
||||||
|
SELECT count()
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
EXPLAIN PLAN indexes=1
|
||||||
|
SELECT * FROM test_table WHERE (NOT ((k in (101) = 0) OR (k in (100) = 1))) = 1
|
||||||
|
)
|
||||||
|
WHERE
|
||||||
|
explain LIKE '%Granules: 1/%';
|
||||||
|
|
||||||
|
|
||||||
|
DROP TABLE test_table;
|
Loading…
Reference in New Issue
Block a user