Merge pull request #54068 from ClickHouse/vdimir/join_on_nullsafe_analyzer

Analyzer support 'is not distinct from' in join on section
This commit is contained in:
vdimir 2023-11-07 10:39:56 +01:00 committed by GitHub
commit 0437b5777a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 82 additions and 14 deletions

View File

@ -1146,12 +1146,13 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
const auto & join_clause_right_key_nodes = join_clause.getRightKeyNodes();
size_t join_clause_key_nodes_size = join_clause_left_key_nodes.size();
assert(join_clause_key_nodes_size == join_clause_right_key_nodes.size());
chassert(join_clause_key_nodes_size == join_clause_right_key_nodes.size());
for (size_t i = 0; i < join_clause_key_nodes_size; ++i)
{
table_join_clause.key_names_left.push_back(join_clause_left_key_nodes[i]->result_name);
table_join_clause.key_names_right.push_back(join_clause_right_key_nodes[i]->result_name);
table_join_clause.addKey(join_clause_left_key_nodes[i]->result_name,
join_clause_right_key_nodes[i]->result_name,
join_clause.isNullsafeCompareKey(i));
}
const auto & join_clause_get_left_filter_condition_nodes = join_clause.getLeftFilterConditionNodes();

View File

@ -191,7 +191,7 @@ void buildJoinClause(ActionsDAGPtr join_expression_dag,
auto asof_inequality = getASOFJoinInequality(function_name);
bool is_asof_join_inequality = join_node.getStrictness() == JoinStrictness::Asof && asof_inequality != ASOFJoinInequality::None;
if (function_name == "equals" || is_asof_join_inequality)
if (function_name == "equals" || function_name == "isNotDistinctFrom" || is_asof_join_inequality)
{
const auto * left_child = join_expressions_actions_node->children.at(0);
const auto * right_child = join_expressions_actions_node->children.at(1);
@ -253,7 +253,8 @@ void buildJoinClause(ActionsDAGPtr join_expression_dag,
}
else
{
join_clause.addKey(left_key, right_key);
bool null_safe_comparison = function_name == "isNotDistinctFrom";
join_clause.addKey(left_key, right_key, null_safe_comparison);
}
}
else
@ -474,6 +475,24 @@ JoinClausesAndActions buildJoinClausesAndActions(const ColumnsWithTypeAndName &
right_key_node = &join_expression_actions->addCast(*right_key_node, common_type, {});
}
if (join_clause.isNullsafeCompareKey(i) && left_key_node->result_type->isNullable() && right_key_node->result_type->isNullable())
{
/**
* In case of null-safe comparison (a IS NOT DISTICT FROM b),
* we need to wrap keys with a non-nullable type.
* The type `tuple` can be used for this purpose,
* because value tuple(NULL) is not NULL itself (moreover it has type Tuple(Nullable(T) which is not Nullable).
* Thus, join algorithm will match keys with values tuple(NULL).
* Example:
* SELECT * FROM t1 JOIN t2 ON t1.a <=> t2.b
* This will be semantically transformed to:
* SELECT * FROM t1 JOIN t2 ON tuple(t1.a) == tuple(t2.b)
*/
auto wrap_nullsafe_function = FunctionFactory::instance().get("tuple", planner_context->getQueryContext());
left_key_node = &join_expression_actions->addFunction(wrap_nullsafe_function, {left_key_node}, {});
right_key_node = &join_expression_actions->addFunction(wrap_nullsafe_function, {right_key_node}, {});
}
join_expression_actions->addOrReplaceInOutputs(*left_key_node);
join_expression_actions->addOrReplaceInOutputs(*right_key_node);

View File

@ -53,10 +53,12 @@ class JoinClause
{
public:
/// Add keys
void addKey(const ActionsDAG::Node * left_key_node, const ActionsDAG::Node * right_key_node)
void addKey(const ActionsDAG::Node * left_key_node, const ActionsDAG::Node * right_key_node, bool null_safe_comparison = false)
{
left_key_nodes.emplace_back(left_key_node);
right_key_nodes.emplace_back(right_key_node);
if (null_safe_comparison)
nullsafe_compare_key_indexes.emplace(left_key_nodes.size() - 1);
}
void addASOFKey(const ActionsDAG::Node * left_key_node, const ActionsDAG::Node * right_key_node, ASOFJoinInequality asof_inequality)
@ -97,6 +99,11 @@ public:
return right_key_nodes;
}
bool isNullsafeCompareKey(size_t idx) const
{
return nullsafe_compare_key_indexes.contains(idx);
}
/// Returns true if JOIN clause has ASOF conditions, false otherwise
bool hasASOF() const
{
@ -147,6 +154,8 @@ private:
ActionsDAG::NodeRawConstPtrs left_filter_condition_nodes;
ActionsDAG::NodeRawConstPtrs right_filter_condition_nodes;
std::unordered_set<size_t> nullsafe_compare_key_indexes;
};
using JoinClauses = std::vector<JoinClause>;

View File

@ -66,4 +66,3 @@
01940_custom_tld_sharding_key
02815_range_dict_no_direct_join
02845_threads_count_in_distributed_queries
02861_join_on_nullsafe_compare

View File

@ -647,6 +647,37 @@ join_algorithm = default, join_use_nulls = 0, t1 JOIN t4
19 19 19 19
\N 20 \N 0
--
\N 0 2 2
\N 0 \N 4
\N 0 6 6
\N 0 \N 8
\N 0 10 10
\N 0 \N 12
\N 0 14 14
\N 0 \N 16
\N 0 18 18
\N 0 \N 20
1 1 1 1
\N 2 \N 0
3 3 3 3
\N 4 \N 0
5 5 5 5
\N 6 \N 0
7 7 7 7
\N 8 \N 0
9 9 9 9
\N 10 \N 0
11 11 11 11
\N 12 \N 0
13 13 13 13
\N 14 \N 0
15 15 15 15
\N 16 \N 0
17 17 17 17
\N 18 \N 0
19 19 19 19
\N 20 \N 0
--
1 42 420 1 1 43 430 1
\N 42 420 2 \N 43 430 4
\N 42 420 2 \N 43 430 8

View File

@ -30,28 +30,28 @@ SELECT 'join_algorithm = {{ join_algorithm }}, join_use_nulls = {{ join_use_null
SELECT '--';
SELECT {{ t1 }}.a, {{ t1 }}.val, {{ t2 }}.a, {{ t2 }}.val FROM {{ t1 }} FULL JOIN {{ t2 }}
ON isNotDistinctFrom({{ t1 }}.a, {{ t2 }}.a)
ON {{ t1 }}.a <=> {{ t2 }}.a
ORDER BY {{ t1 }}.val NULLS FIRST, {{ t2 }}.val NULLS FIRST
;
SELECT '--';
SELECT * FROM {{ t1 }} FULL JOIN {{ t2 }}
ON isNotDistinctFrom({{ t1 }}.a, {{ t2 }}.a) AND isNotDistinctFrom({{ t1 }}.b, {{ t2 }}.b)
ON {{ t1 }}.a <=> {{ t2 }}.a AND {{ t1 }}.b <=> {{ t2 }}.b
ORDER BY {{ t1 }}.val NULLS FIRST, {{ t2 }}.val NULLS FIRST
;
SELECT '--';
SELECT * FROM {{ t1 }} FULL JOIN {{ t2 }}
ON {{ t1 }}.a == {{ t2 }}.a AND isNotDistinctFrom({{ t1 }}.b, {{ t2 }}.b)
ON {{ t1 }}.a == {{ t2 }}.a AND {{ t1 }}.b <=> {{ t2 }}.b
ORDER BY {{ t1 }}.val NULLS FIRST, {{ t2 }}.val NULLS FIRST
;
SELECT '--';
SELECT * FROM {{ t1 }} FULL JOIN {{ t2 }}
ON isNotDistinctFrom({{ t1 }}.a, {{ t2 }}.a) AND {{ t1 }}.b == {{ t2 }}.b
ON {{ t1 }}.a <=> {{ t2 }}.a AND {{ t1 }}.b == {{ t2 }}.b
ORDER BY {{ t1 }}.val NULLS FIRST, {{ t2 }}.val NULLS FIRST
;
@ -62,7 +62,14 @@ SELECT '--';
SET join_use_nulls = 0;
SET join_algorithm = 'hash';
SELECT t1.a, t1.val, t2.a, t2.val FROM t1 FULL JOIN t2
ON isNotDistinctFrom(t1.a, t2.a) AND t1.b < 2 OR t1.a == t2.a
ON t1.a <=> t2.a AND t1.b < 2 OR t1.a == t2.a
ORDER BY t1.val NULLS FIRST, t2.val NULLS FIRST
;
SELECT '--';
SELECT t1.a, t1.val, t2.a, t2.val FROM t1 FULL JOIN t2
ON t1.a IS NOT DISTINCT FROM t2.a AND t1.b < 2 OR t1.a == t2.a
ORDER BY t1.val NULLS FIRST, t2.val NULLS FIRST
;
@ -76,7 +83,7 @@ SET join_use_nulls = 1;
SELECT *
FROM (SELECT a, 42 as `__wrapNullsafe(a)`, 420 as `tuple(a)`, val FROM t1) t1
JOIN (SELECT a, 43 as `__wrapNullsafe(t2.a)`, 430 as `tuple(t2.a)`, val FROM t2) t2
ON isNotDistinctFrom(t1.a, t2.a)
ON t1.a <=> t2.a
ORDER BY t1.val NULLS FIRST, t2.val NULLS FIRST
LIMIT 10;
@ -85,7 +92,7 @@ SELECT '--';
SELECT a, 42 as `__wrapNullsafe(a)`, 420 as `tuple(a)`, val, t2.a, 43 as `__wrapNullsafe(t2.a)`, 430 as `tuple(t2.a)`, t2.val
FROM (SELECT a, val, 111 as `__wrapNullsafe(a)_0` FROM t1) t1
JOIN (SELECT a, val, 111 as `__wrapNullsafe(t2.a)_0` FROM t2) t2
ON isNotDistinctFrom(t1.a, t2.a)
ON t1.a <=> t2.a
ORDER BY t1.val NULLS FIRST, t2.val NULLS FIRST
LIMIT 10;
@ -99,3 +106,5 @@ SELECT * FROM t1 JOIN t2 ON isNotDistinctFrom(t1.a, t2.a, t2.b); -- { serverErro
SELECT isNotDistinctFrom(a) from t1; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
SELECT isNotDistinctFrom(a, b) from t1; -- { serverError NOT_IMPLEMENTED }
SELECT a <=> b from t1; -- { serverError NOT_IMPLEMENTED }
SELECT a IS NOT DISTINCT FROM b from t1; -- { serverError NOT_IMPLEMENTED }