Merge pull request #68049 from ClickHouse/vdimir/fix_issue_67915

Fix join on nulls optimization
This commit is contained in:
Vladimir Cherkasov 2024-10-22 13:18:49 +00:00 committed by GitHub
commit 5b9bbace65
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 25 additions and 0 deletions

View File

@ -184,6 +184,26 @@ public:
, join_node(join_node_)
{}
bool needChildVisit(const QueryTreeNodePtr & parent, const QueryTreeNodePtr &)
{
/** Optimization can change the value of some expression from NULL to FALSE.
* For example:
* when `a` is `NULL`, the expression `a = b AND a IS NOT NULL` returns `NULL`
* and it will be optimized to `a = b`, which returns `FALSE`.
* This is valid for JOIN ON condition and for the functions `AND`/`OR` inside it.
* (When we replace `AND`/`OR` operands from `NULL` to `FALSE`, the result value can also change only from `NULL` to `FALSE`)
* However, in the general case, the result can be wrong.
* For example, for NOT: `NOT NULL` is `NULL`, but `NOT FALSE` is `TRUE`.
* Therefore, optimize only top-level expression or expressions inside `AND`/`OR`.
*/
if (const auto * function_node = parent->as<FunctionNode>())
{
const auto & func_name = function_node->getFunctionName();
return func_name == "or" || func_name == "and";
}
return parent->getNodeType() == QueryTreeNodeType::LIST;
}
void enterImpl(QueryTreeNodePtr & node)
{
auto * function_node = node->as<FunctionNode>();

View File

@ -64,3 +64,4 @@ SELECT * FROM t1n as t1 JOIN t2n as t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL)
0
2
2
1

View File

@ -67,6 +67,10 @@ SELECT count() FROM ( EXPLAIN QUERY TREE
SELECT * FROM t1 JOIN t2 ON t1.x <=> t2.x AND (t1.x = t1.y OR t1.x IS NULL AND t1.y IS NULL)
) WHERE explain like '%CONSTANT%' OR explain ilike '%is%null%';
SELECT count() FROM ( EXPLAIN QUERY TREE
SELECT * FROM t1 JOIN t2 ON t1.x = t2.x AND NOT (t1.x = 1 OR t1.x IS NULL)
) WHERE explain ilike '%function_name: isNull%';
DROP TABLE IF EXISTS t1;
DROP TABLE IF EXISTS t2;
DROP TABLE IF EXISTS t1n;