Merge pull request #66725 from ClickHouse/vdimir/join_on_null_fix_followup

Support one more case in JOIN ON ... IS NULL
This commit is contained in:
vdimir 2024-07-22 10:09:57 +00:00 committed by GitHub
commit 77c253634a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 47 additions and 4 deletions

View File

@ -68,6 +68,41 @@ QueryTreeNodePtr findEqualsFunction(const QueryTreeNodes & nodes)
return nullptr;
}
/// Checks if the node is combination of isNull and notEquals functions of two the same arguments
bool matchIsNullOfTwoArgs(const QueryTreeNodes & nodes, QueryTreeNodePtr & lhs, QueryTreeNodePtr & rhs)
{
QueryTreeNodePtrWithHashSet all_arguments;
for (const auto & node : nodes)
{
const auto * func_node = node->as<FunctionNode>();
if (!func_node)
return false;
const auto & arguments = func_node->getArguments().getNodes();
if (func_node->getFunctionName() == "isNull" && arguments.size() == 1)
all_arguments.insert(QueryTreeNodePtrWithHash(arguments[0]));
else if (func_node->getFunctionName() == "notEquals" && arguments.size() == 2)
{
if (arguments[0]->isEqual(*arguments[1]))
return false;
all_arguments.insert(QueryTreeNodePtrWithHash(arguments[0]));
all_arguments.insert(QueryTreeNodePtrWithHash(arguments[1]));
}
else
return false;
if (all_arguments.size() > 2)
return false;
}
if (all_arguments.size() != 2)
return false;
lhs = all_arguments.begin()->node;
rhs = std::next(all_arguments.begin())->node;
return true;
}
bool isBooleanConstant(const QueryTreeNodePtr & node, bool expected_value)
{
const auto * constant_node = node->as<ConstantNode>();
@ -213,11 +248,14 @@ private:
else if (func_name == "and")
{
const auto & and_arguments = argument_function->getArguments().getNodes();
bool all_are_is_null = and_arguments.size() == 2 && isNodeFunction(and_arguments[0], "isNull") && isNodeFunction(and_arguments[1], "isNull");
if (all_are_is_null)
QueryTreeNodePtr is_null_lhs_arg;
QueryTreeNodePtr is_null_rhs_arg;
if (matchIsNullOfTwoArgs(and_arguments, is_null_lhs_arg, is_null_rhs_arg))
{
is_null_argument_to_indices[getFunctionArgument(and_arguments.front(), 0)].push_back(or_operands.size() - 1);
is_null_argument_to_indices[getFunctionArgument(and_arguments.back(), 0)].push_back(or_operands.size() - 1);
is_null_argument_to_indices[is_null_lhs_arg].push_back(or_operands.size() - 1);
is_null_argument_to_indices[is_null_rhs_arg].push_back(or_operands.size() - 1);
continue;
}
/// Expression `a = b AND (a IS NOT NULL) AND true AND (b IS NOT NULL)` we can be replaced with `a = b`

View File

@ -35,6 +35,10 @@ SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS N
2 2 2 2
3 3 3 33
\N \N \N \N
SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( t2.x <> t1.x AND (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST;
2 2 2 2
3 3 3 33
\N \N \N \N
-- aliases defined in the join condition are valid
-- FIXME(@vdimir) broken query formatting for the following queries:
-- SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST;

View File

@ -34,6 +34,7 @@ SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR (t1.x IS NULL AND t2.x IS NULL)) A
SELECT x = y OR (x IS NULL AND y IS NULL) FROM t1 ORDER BY x NULLS LAST;
SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST;
SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( t2.x <> t1.x AND (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST;
-- aliases defined in the join condition are valid
-- FIXME(@vdimir) broken query formatting for the following queries: