mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 07:31:57 +00:00
Merge pull request #54068 from ClickHouse/vdimir/join_on_nullsafe_analyzer
Analyzer support 'is not distinct from' in join on section
This commit is contained in:
commit
0437b5777a
@ -1146,12 +1146,13 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
|
||||
const auto & join_clause_right_key_nodes = join_clause.getRightKeyNodes();
|
||||
|
||||
size_t join_clause_key_nodes_size = join_clause_left_key_nodes.size();
|
||||
assert(join_clause_key_nodes_size == join_clause_right_key_nodes.size());
|
||||
chassert(join_clause_key_nodes_size == join_clause_right_key_nodes.size());
|
||||
|
||||
for (size_t i = 0; i < join_clause_key_nodes_size; ++i)
|
||||
{
|
||||
table_join_clause.key_names_left.push_back(join_clause_left_key_nodes[i]->result_name);
|
||||
table_join_clause.key_names_right.push_back(join_clause_right_key_nodes[i]->result_name);
|
||||
table_join_clause.addKey(join_clause_left_key_nodes[i]->result_name,
|
||||
join_clause_right_key_nodes[i]->result_name,
|
||||
join_clause.isNullsafeCompareKey(i));
|
||||
}
|
||||
|
||||
const auto & join_clause_get_left_filter_condition_nodes = join_clause.getLeftFilterConditionNodes();
|
||||
|
@ -191,7 +191,7 @@ void buildJoinClause(ActionsDAGPtr join_expression_dag,
|
||||
auto asof_inequality = getASOFJoinInequality(function_name);
|
||||
bool is_asof_join_inequality = join_node.getStrictness() == JoinStrictness::Asof && asof_inequality != ASOFJoinInequality::None;
|
||||
|
||||
if (function_name == "equals" || is_asof_join_inequality)
|
||||
if (function_name == "equals" || function_name == "isNotDistinctFrom" || is_asof_join_inequality)
|
||||
{
|
||||
const auto * left_child = join_expressions_actions_node->children.at(0);
|
||||
const auto * right_child = join_expressions_actions_node->children.at(1);
|
||||
@ -253,7 +253,8 @@ void buildJoinClause(ActionsDAGPtr join_expression_dag,
|
||||
}
|
||||
else
|
||||
{
|
||||
join_clause.addKey(left_key, right_key);
|
||||
bool null_safe_comparison = function_name == "isNotDistinctFrom";
|
||||
join_clause.addKey(left_key, right_key, null_safe_comparison);
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -474,6 +475,24 @@ JoinClausesAndActions buildJoinClausesAndActions(const ColumnsWithTypeAndName &
|
||||
right_key_node = &join_expression_actions->addCast(*right_key_node, common_type, {});
|
||||
}
|
||||
|
||||
if (join_clause.isNullsafeCompareKey(i) && left_key_node->result_type->isNullable() && right_key_node->result_type->isNullable())
|
||||
{
|
||||
/**
|
||||
* In case of null-safe comparison (a IS NOT DISTICT FROM b),
|
||||
* we need to wrap keys with a non-nullable type.
|
||||
* The type `tuple` can be used for this purpose,
|
||||
* because value tuple(NULL) is not NULL itself (moreover it has type Tuple(Nullable(T) which is not Nullable).
|
||||
* Thus, join algorithm will match keys with values tuple(NULL).
|
||||
* Example:
|
||||
* SELECT * FROM t1 JOIN t2 ON t1.a <=> t2.b
|
||||
* This will be semantically transformed to:
|
||||
* SELECT * FROM t1 JOIN t2 ON tuple(t1.a) == tuple(t2.b)
|
||||
*/
|
||||
auto wrap_nullsafe_function = FunctionFactory::instance().get("tuple", planner_context->getQueryContext());
|
||||
left_key_node = &join_expression_actions->addFunction(wrap_nullsafe_function, {left_key_node}, {});
|
||||
right_key_node = &join_expression_actions->addFunction(wrap_nullsafe_function, {right_key_node}, {});
|
||||
}
|
||||
|
||||
join_expression_actions->addOrReplaceInOutputs(*left_key_node);
|
||||
join_expression_actions->addOrReplaceInOutputs(*right_key_node);
|
||||
|
||||
|
@ -53,10 +53,12 @@ class JoinClause
|
||||
{
|
||||
public:
|
||||
/// Add keys
|
||||
void addKey(const ActionsDAG::Node * left_key_node, const ActionsDAG::Node * right_key_node)
|
||||
void addKey(const ActionsDAG::Node * left_key_node, const ActionsDAG::Node * right_key_node, bool null_safe_comparison = false)
|
||||
{
|
||||
left_key_nodes.emplace_back(left_key_node);
|
||||
right_key_nodes.emplace_back(right_key_node);
|
||||
if (null_safe_comparison)
|
||||
nullsafe_compare_key_indexes.emplace(left_key_nodes.size() - 1);
|
||||
}
|
||||
|
||||
void addASOFKey(const ActionsDAG::Node * left_key_node, const ActionsDAG::Node * right_key_node, ASOFJoinInequality asof_inequality)
|
||||
@ -97,6 +99,11 @@ public:
|
||||
return right_key_nodes;
|
||||
}
|
||||
|
||||
bool isNullsafeCompareKey(size_t idx) const
|
||||
{
|
||||
return nullsafe_compare_key_indexes.contains(idx);
|
||||
}
|
||||
|
||||
/// Returns true if JOIN clause has ASOF conditions, false otherwise
|
||||
bool hasASOF() const
|
||||
{
|
||||
@ -147,6 +154,8 @@ private:
|
||||
|
||||
ActionsDAG::NodeRawConstPtrs left_filter_condition_nodes;
|
||||
ActionsDAG::NodeRawConstPtrs right_filter_condition_nodes;
|
||||
|
||||
std::unordered_set<size_t> nullsafe_compare_key_indexes;
|
||||
};
|
||||
|
||||
using JoinClauses = std::vector<JoinClause>;
|
||||
|
@ -66,4 +66,3 @@
|
||||
01940_custom_tld_sharding_key
|
||||
02815_range_dict_no_direct_join
|
||||
02845_threads_count_in_distributed_queries
|
||||
02861_join_on_nullsafe_compare
|
||||
|
@ -647,6 +647,37 @@ join_algorithm = default, join_use_nulls = 0, t1 JOIN t4
|
||||
19 19 19 19
|
||||
\N 20 \N 0
|
||||
--
|
||||
\N 0 2 2
|
||||
\N 0 \N 4
|
||||
\N 0 6 6
|
||||
\N 0 \N 8
|
||||
\N 0 10 10
|
||||
\N 0 \N 12
|
||||
\N 0 14 14
|
||||
\N 0 \N 16
|
||||
\N 0 18 18
|
||||
\N 0 \N 20
|
||||
1 1 1 1
|
||||
\N 2 \N 0
|
||||
3 3 3 3
|
||||
\N 4 \N 0
|
||||
5 5 5 5
|
||||
\N 6 \N 0
|
||||
7 7 7 7
|
||||
\N 8 \N 0
|
||||
9 9 9 9
|
||||
\N 10 \N 0
|
||||
11 11 11 11
|
||||
\N 12 \N 0
|
||||
13 13 13 13
|
||||
\N 14 \N 0
|
||||
15 15 15 15
|
||||
\N 16 \N 0
|
||||
17 17 17 17
|
||||
\N 18 \N 0
|
||||
19 19 19 19
|
||||
\N 20 \N 0
|
||||
--
|
||||
1 42 420 1 1 43 430 1
|
||||
\N 42 420 2 \N 43 430 4
|
||||
\N 42 420 2 \N 43 430 8
|
||||
|
@ -30,28 +30,28 @@ SELECT 'join_algorithm = {{ join_algorithm }}, join_use_nulls = {{ join_use_null
|
||||
SELECT '--';
|
||||
|
||||
SELECT {{ t1 }}.a, {{ t1 }}.val, {{ t2 }}.a, {{ t2 }}.val FROM {{ t1 }} FULL JOIN {{ t2 }}
|
||||
ON isNotDistinctFrom({{ t1 }}.a, {{ t2 }}.a)
|
||||
ON {{ t1 }}.a <=> {{ t2 }}.a
|
||||
ORDER BY {{ t1 }}.val NULLS FIRST, {{ t2 }}.val NULLS FIRST
|
||||
;
|
||||
|
||||
SELECT '--';
|
||||
|
||||
SELECT * FROM {{ t1 }} FULL JOIN {{ t2 }}
|
||||
ON isNotDistinctFrom({{ t1 }}.a, {{ t2 }}.a) AND isNotDistinctFrom({{ t1 }}.b, {{ t2 }}.b)
|
||||
ON {{ t1 }}.a <=> {{ t2 }}.a AND {{ t1 }}.b <=> {{ t2 }}.b
|
||||
ORDER BY {{ t1 }}.val NULLS FIRST, {{ t2 }}.val NULLS FIRST
|
||||
;
|
||||
|
||||
SELECT '--';
|
||||
|
||||
SELECT * FROM {{ t1 }} FULL JOIN {{ t2 }}
|
||||
ON {{ t1 }}.a == {{ t2 }}.a AND isNotDistinctFrom({{ t1 }}.b, {{ t2 }}.b)
|
||||
ON {{ t1 }}.a == {{ t2 }}.a AND {{ t1 }}.b <=> {{ t2 }}.b
|
||||
ORDER BY {{ t1 }}.val NULLS FIRST, {{ t2 }}.val NULLS FIRST
|
||||
;
|
||||
|
||||
SELECT '--';
|
||||
|
||||
SELECT * FROM {{ t1 }} FULL JOIN {{ t2 }}
|
||||
ON isNotDistinctFrom({{ t1 }}.a, {{ t2 }}.a) AND {{ t1 }}.b == {{ t2 }}.b
|
||||
ON {{ t1 }}.a <=> {{ t2 }}.a AND {{ t1 }}.b == {{ t2 }}.b
|
||||
ORDER BY {{ t1 }}.val NULLS FIRST, {{ t2 }}.val NULLS FIRST
|
||||
;
|
||||
|
||||
@ -62,7 +62,14 @@ SELECT '--';
|
||||
SET join_use_nulls = 0;
|
||||
SET join_algorithm = 'hash';
|
||||
SELECT t1.a, t1.val, t2.a, t2.val FROM t1 FULL JOIN t2
|
||||
ON isNotDistinctFrom(t1.a, t2.a) AND t1.b < 2 OR t1.a == t2.a
|
||||
ON t1.a <=> t2.a AND t1.b < 2 OR t1.a == t2.a
|
||||
ORDER BY t1.val NULLS FIRST, t2.val NULLS FIRST
|
||||
;
|
||||
|
||||
SELECT '--';
|
||||
|
||||
SELECT t1.a, t1.val, t2.a, t2.val FROM t1 FULL JOIN t2
|
||||
ON t1.a IS NOT DISTINCT FROM t2.a AND t1.b < 2 OR t1.a == t2.a
|
||||
ORDER BY t1.val NULLS FIRST, t2.val NULLS FIRST
|
||||
;
|
||||
|
||||
@ -76,7 +83,7 @@ SET join_use_nulls = 1;
|
||||
SELECT *
|
||||
FROM (SELECT a, 42 as `__wrapNullsafe(a)`, 420 as `tuple(a)`, val FROM t1) t1
|
||||
JOIN (SELECT a, 43 as `__wrapNullsafe(t2.a)`, 430 as `tuple(t2.a)`, val FROM t2) t2
|
||||
ON isNotDistinctFrom(t1.a, t2.a)
|
||||
ON t1.a <=> t2.a
|
||||
ORDER BY t1.val NULLS FIRST, t2.val NULLS FIRST
|
||||
LIMIT 10;
|
||||
|
||||
@ -85,7 +92,7 @@ SELECT '--';
|
||||
SELECT a, 42 as `__wrapNullsafe(a)`, 420 as `tuple(a)`, val, t2.a, 43 as `__wrapNullsafe(t2.a)`, 430 as `tuple(t2.a)`, t2.val
|
||||
FROM (SELECT a, val, 111 as `__wrapNullsafe(a)_0` FROM t1) t1
|
||||
JOIN (SELECT a, val, 111 as `__wrapNullsafe(t2.a)_0` FROM t2) t2
|
||||
ON isNotDistinctFrom(t1.a, t2.a)
|
||||
ON t1.a <=> t2.a
|
||||
ORDER BY t1.val NULLS FIRST, t2.val NULLS FIRST
|
||||
LIMIT 10;
|
||||
|
||||
@ -99,3 +106,5 @@ SELECT * FROM t1 JOIN t2 ON isNotDistinctFrom(t1.a, t2.a, t2.b); -- { serverErro
|
||||
|
||||
SELECT isNotDistinctFrom(a) from t1; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
|
||||
SELECT isNotDistinctFrom(a, b) from t1; -- { serverError NOT_IMPLEMENTED }
|
||||
SELECT a <=> b from t1; -- { serverError NOT_IMPLEMENTED }
|
||||
SELECT a IS NOT DISTINCT FROM b from t1; -- { serverError NOT_IMPLEMENTED }
|
||||
|
Loading…
Reference in New Issue
Block a user