review fix

This commit is contained in:
vdimir 2024-11-04 10:36:30 +00:00
parent a80e11f21b
commit 08ec69736f
No known key found for this signature in database
GPG Key ID: 6EE4CE2BEDC51862
8 changed files with 83 additions and 75 deletions

View File

@ -173,18 +173,7 @@ Result:
└───┴────┴─────┘ └───┴────┴─────┘
``` ```
## [experimental] Join with inequality conditions for columns from different tables ## Join with inequality conditions for columns from different tables
:::note
This feature is experimental. To use it, set `allow_experimental_join_condition` to 1 in your configuration files or by using the `SET` command:
```sql
SET allow_experimental_join_condition=1
```
Otherwise, you'll get `INVALID_JOIN_ON_EXPRESSION`.
:::
Clickhouse currently supports `ALL/ANY/SEMI/ANTI INNER/LEFT/RIGHT/FULL JOIN` with inequality conditions in addition to equality conditions. The inequality conditions are supported only for `hash` and `grace_hash` join algorithms. The inequality conditions are not supported with `join_use_nulls`. Clickhouse currently supports `ALL/ANY/SEMI/ANTI INNER/LEFT/RIGHT/FULL JOIN` with inequality conditions in addition to equality conditions. The inequality conditions are supported only for `hash` and `grace_hash` join algorithms. The inequality conditions are not supported with `join_use_nulls`.

View File

@ -600,15 +600,15 @@ void buildTableExpressionsStackImpl(const QueryTreeNodePtr & join_tree_node, Que
} }
QueryTreeNodes buildTableExpressionsStack(const QueryTreeNodePtr & query_node, QueryTreeNodes * out_nearest_query) QueryTreeNodes buildTableExpressionsStack(const QueryTreeNodePtr & query_node, QueryTreeNodes * nearest_query)
{ {
QueryTreeNodes result; QueryTreeNodes result;
QueryTreeNodes nearest_query_tmp; QueryTreeNodes nearest_query_tmp;
if (out_nearest_query == nullptr) if (nearest_query == nullptr)
out_nearest_query = &nearest_query_tmp; nearest_query = &nearest_query_tmp;
buildTableExpressionsStackImpl(query_node->as<QueryNode>()->getJoinTree(), query_node, result, *out_nearest_query); buildTableExpressionsStackImpl(query_node->as<QueryNode>()->getJoinTree(), query_node, result, *nearest_query);
return result; return result;
} }

View File

@ -67,6 +67,9 @@ QueryTreeNodes extractTableExpressions(const QueryTreeNodePtr & join_tree_node,
QueryTreeNodePtr extractLeftTableExpression(const QueryTreeNodePtr & join_tree_node); QueryTreeNodePtr extractLeftTableExpression(const QueryTreeNodePtr & join_tree_node);
/** Build table expressions stack that consists from table, table function, query, union, join, array join from join tree. /** Build table expressions stack that consists from table, table function, query, union, join, array join from join tree.
* Parameters:
* query_node - query node that contains join tree.
* nearest_query - if not nullptr, then nearest query node will be stored in this variable.
* *
* Example: SELECT * FROM t1 INNER JOIN t2 INNER JOIN t3. * Example: SELECT * FROM t1 INNER JOIN t2 INNER JOIN t3.
* Result table expressions stack: * Result table expressions stack:
@ -75,8 +78,10 @@ QueryTreeNodePtr extractLeftTableExpression(const QueryTreeNodePtr & join_tree_n
* 3. t1 INNER JOIN t2 * 3. t1 INNER JOIN t2
* 4. t2 * 4. t2
* 5. t1 * 5. t1
*
* Nearest query is the query itself in this case.
*/ */
QueryTreeNodes buildTableExpressionsStack(const QueryTreeNodePtr & query_node, QueryTreeNodes * out_nearest_query = nullptr); QueryTreeNodes buildTableExpressionsStack(const QueryTreeNodePtr & query_node, QueryTreeNodes * nearest_query = nullptr);
/** Assert that there are no function nodes with specified function name in node children. /** Assert that there are no function nodes with specified function name in node children.

View File

@ -5,6 +5,7 @@
#include <Interpreters/IJoin.h> #include <Interpreters/IJoin.h>
#include <Interpreters/ActionsDAG.h> #include <Interpreters/ActionsDAG.h>
#include <Interpreters/ExpressionActions.h> #include <Interpreters/ExpressionActions.h>
#include <Core/Joins.h>
namespace DB namespace DB
{ {
@ -168,4 +169,18 @@ private:
void setRightIndex(size_t right_pos, size_t result_position); void setRightIndex(size_t right_pos, size_t result_position);
}; };
/// Call the same func twice - for left arguments and then right arguments
template <typename Func, typename... Args>
void forJoinSides(Func && func, std::tuple<Args...> && left, std::tuple<Args...> && right) {
std::apply([&](auto &&... args)
{
func(JoinTableSide::Left, std::forward<decltype(args)>(args)...);
}, std::forward<std::tuple<Args...>>(left));
std::apply([&](auto &&... args)
{
func(JoinTableSide::Right, std::forward<decltype(args)>(args)...);
}, std::forward<std::tuple<Args...>>(right));
}
} }

View File

@ -1291,12 +1291,13 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
planner_context); planner_context);
const auto & left_pre_filters = join_clauses_and_actions.join_clauses[0].getLeftFilterConditionNodes(); const auto & left_pre_filters = join_clauses_and_actions.join_clauses[0].getLeftFilterConditionNodes();
if (!left_pre_filters.empty() && left_pre_filters.size() != 1)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected only one left pre-filter condition node. Actual [{}]", fmt::join(left_pre_filters | std::views::transform([](const auto & node) { return node->result_name; }), ", "));
const auto & right_pre_filters = join_clauses_and_actions.join_clauses[0].getRightFilterConditionNodes(); const auto & right_pre_filters = join_clauses_and_actions.join_clauses[0].getRightFilterConditionNodes();
if (!right_pre_filters.empty() && right_pre_filters.size() != 1) forJoinSides([](JoinTableSide side, const auto & pre_filters)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected only one right pre-filter condition node. Actual [{}]", fmt::join(right_pre_filters | std::views::transform([](const auto & node) { return node->result_name; }), ", ")); {
if (!pre_filters.empty() && pre_filters.size() != 1)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected only one {} pre-filter condition node. Actual [{}]",
side, fmt::join(pre_filters | std::views::transform([](const auto & node) { return node->result_name; }), ", "));
}, std::make_tuple(std::ref(left_pre_filters)), std::make_tuple(std::ref(right_pre_filters)));
can_move_out_residuals = join_clauses_and_actions.join_clauses.size() == 1 can_move_out_residuals = join_clauses_and_actions.join_clauses.size() == 1
&& join_strictness == JoinStrictness::All && join_strictness == JoinStrictness::All
@ -1304,29 +1305,21 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
&& (right_pre_filters.empty() || FilterStep::canUseType(right_pre_filters[0]->result_type)) && (right_pre_filters.empty() || FilterStep::canUseType(right_pre_filters[0]->result_type))
&& (left_pre_filters.empty() || FilterStep::canUseType(left_pre_filters[0]->result_type)); && (left_pre_filters.empty() || FilterStep::canUseType(left_pre_filters[0]->result_type));
join_clauses_and_actions.left_join_expressions_actions.appendInputsForUnusedColumns(left_plan.getCurrentHeader()); forJoinSides([&](JoinTableSide, ActionsDAG & join_expressions_actions, QueryPlan & plan, const auto & pre_filters)
appendSetsFromActionsDAG(join_clauses_and_actions.left_join_expressions_actions, left_join_tree_query_plan.useful_sets); {
join_expressions_actions.appendInputsForUnusedColumns(left_plan.getCurrentHeader());
appendSetsFromActionsDAG(join_expressions_actions, left_join_tree_query_plan.useful_sets);
QueryPlanStepPtr left_join_expressions_actions_step; QueryPlanStepPtr join_expressions_actions_step;
if (can_move_out_residuals && !left_pre_filters.empty()) if (can_move_out_residuals && !left_pre_filters.empty())
left_join_expressions_actions_step = std::make_unique<FilterStep>(left_plan.getCurrentHeader(), std::move(join_clauses_and_actions.left_join_expressions_actions), left_pre_filters[0]->result_name, false); join_expressions_actions_step = std::make_unique<FilterStep>(plan.getCurrentHeader(), std::move(join_expressions_actions), pre_filters[0]->result_name, false);
else else
left_join_expressions_actions_step = std::make_unique<ExpressionStep>(left_plan.getCurrentHeader(), std::move(join_clauses_and_actions.left_join_expressions_actions)); join_expressions_actions_step = std::make_unique<ExpressionStep>(plan.getCurrentHeader(), std::move(join_expressions_actions));
left_join_expressions_actions_step->setStepDescription("JOIN actions"); join_expressions_actions_step->setStepDescription("JOIN actions");
left_plan.addStep(std::move(left_join_expressions_actions_step)); plan.addStep(std::move(join_expressions_actions_step));
}, std::make_tuple(std::ref(join_clauses_and_actions.left_join_expressions_actions), std::ref(left_plan), std::ref(left_pre_filters)),
join_clauses_and_actions.right_join_expressions_actions.appendInputsForUnusedColumns(right_plan.getCurrentHeader()); std::make_tuple(std::ref(join_clauses_and_actions.right_join_expressions_actions), std::ref(right_plan), std::ref(right_pre_filters)));
appendSetsFromActionsDAG(join_clauses_and_actions.right_join_expressions_actions, right_join_tree_query_plan.useful_sets);
QueryPlanStepPtr right_join_expressions_actions_step;
if (can_move_out_residuals && !right_pre_filters.empty())
right_join_expressions_actions_step = std::make_unique<FilterStep>(right_plan.getCurrentHeader(), std::move(join_clauses_and_actions.right_join_expressions_actions), right_pre_filters[0]->result_name, false);
else
right_join_expressions_actions_step = std::make_unique<ExpressionStep>(right_plan.getCurrentHeader(), std::move(join_clauses_and_actions.right_join_expressions_actions));
right_join_expressions_actions_step->setStepDescription("JOIN actions");
right_plan.addStep(std::move(right_join_expressions_actions_step));
} }
std::unordered_map<ColumnIdentifier, DataTypePtr> left_plan_column_name_to_cast_type; std::unordered_map<ColumnIdentifier, DataTypePtr> left_plan_column_name_to_cast_type;
@ -1460,6 +1453,9 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
if (join_clause_key_nodes_size == 0 && !can_move_out_residuals) if (join_clause_key_nodes_size == 0 && !can_move_out_residuals)
throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, "Cannot determine join keys in {}", join_node.formatASTForErrorMessage()); throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, "Cannot determine join keys in {}", join_node.formatASTForErrorMessage());
/// If there are no keys, but only conditions that cannot be used as keys, then it is a cross join.
/// Example: SELECT * FROM t1 JOIN t2 ON t1.x > t2.y
/// Same as: SELECT * FROM t1 CROSS JOIN t2 WHERE t1.x > t2.y
if (join_clause_key_nodes_size == 0 && can_move_out_residuals) if (join_clause_key_nodes_size == 0 && can_move_out_residuals)
{ {
join_kind = JoinKind::Cross; join_kind = JoinKind::Cross;
@ -1531,15 +1527,16 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
} }
} }
if (!can_move_out_residuals && join_clauses_and_actions.mixed_join_expressions_actions) if (!can_move_out_residuals && join_clauses_and_actions.residual_join_expressions_actions)
{ {
/// Let join algorithm handle residual conditions
ExpressionActionsPtr & mixed_join_expression = table_join->getMixedJoinExpression(); ExpressionActionsPtr & mixed_join_expression = table_join->getMixedJoinExpression();
mixed_join_expression = std::make_shared<ExpressionActions>( mixed_join_expression = std::make_shared<ExpressionActions>(
std::move(*join_clauses_and_actions.mixed_join_expressions_actions), std::move(*join_clauses_and_actions.residual_join_expressions_actions),
ExpressionActionsSettings::fromContext(planner_context->getQueryContext())); ExpressionActionsSettings::fromContext(planner_context->getQueryContext()));
appendSetsFromActionsDAG(mixed_join_expression->getActionsDAG(), left_join_tree_query_plan.useful_sets); appendSetsFromActionsDAG(mixed_join_expression->getActionsDAG(), left_join_tree_query_plan.useful_sets);
join_clauses_and_actions.mixed_join_expressions_actions.reset(); join_clauses_and_actions.residual_join_expressions_actions.reset();
} }
} }
else if (join_node.isUsingJoinExpression()) else if (join_node.isUsingJoinExpression())
@ -1688,26 +1685,28 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
result_plan.unitePlans(std::move(join_step), {std::move(plans)}); result_plan.unitePlans(std::move(join_step), {std::move(plans)});
} }
if (join_clauses_and_actions.mixed_join_expressions_actions) /// If residuals were not moved to JOIN algorithm,
/// we need to process add then as WHERE condition after JOIN
if (join_clauses_and_actions.residual_join_expressions_actions)
{ {
auto outputs = join_clauses_and_actions.mixed_join_expressions_actions->getOutputs(); auto outputs = join_clauses_and_actions.residual_join_expressions_actions->getOutputs();
if (outputs.size() != 1) if (outputs.size() != 1)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected 1 output column in JOIN actions, got {}", throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected 1 output column in JOIN actions, got {}",
join_clauses_and_actions.mixed_join_expressions_actions->dumpDAG()); join_clauses_and_actions.residual_join_expressions_actions->dumpDAG());
join_clauses_and_actions.mixed_join_expressions_actions->appendInputsForUnusedColumns(result_plan.getCurrentHeader()); join_clauses_and_actions.residual_join_expressions_actions->appendInputsForUnusedColumns(result_plan.getCurrentHeader());
for (const auto * input_node : join_clauses_and_actions.mixed_join_expressions_actions->getInputs()) for (const auto * input_node : join_clauses_and_actions.residual_join_expressions_actions->getInputs())
join_clauses_and_actions.mixed_join_expressions_actions->addOrReplaceInOutputs(*input_node); join_clauses_and_actions.residual_join_expressions_actions->addOrReplaceInOutputs(*input_node);
appendSetsFromActionsDAG(*join_clauses_and_actions.mixed_join_expressions_actions, left_join_tree_query_plan.useful_sets); appendSetsFromActionsDAG(*join_clauses_and_actions.residual_join_expressions_actions, left_join_tree_query_plan.useful_sets);
auto filter_step = std::make_unique<FilterStep>(result_plan.getCurrentHeader(), auto filter_step = std::make_unique<FilterStep>(result_plan.getCurrentHeader(),
std::move(*join_clauses_and_actions.mixed_join_expressions_actions), std::move(*join_clauses_and_actions.residual_join_expressions_actions),
outputs[0]->result_name, outputs[0]->result_name,
/* remove_column = */ false); /// Unused columns will be removed by next step /* remove_column = */ false); /// Unused columns will be removed by next step
filter_step->setStepDescription("Residual JOIN filter"); filter_step->setStepDescription("Residual JOIN filter");
result_plan.addStep(std::move(filter_step)); result_plan.addStep(std::move(filter_step));
join_clauses_and_actions.mixed_join_expressions_actions.reset(); join_clauses_and_actions.residual_join_expressions_actions.reset();
} }
ActionsDAG drop_unused_columns_after_join_actions_dag(result_plan.getCurrentHeader().getColumnsWithTypeAndName()); ActionsDAG drop_unused_columns_after_join_actions_dag(result_plan.getCurrentHeader().getColumnsWithTypeAndName());

View File

@ -378,8 +378,8 @@ void buildJoinClause(
{ {
throw Exception( throw Exception(
ErrorCodes::INVALID_JOIN_ON_EXPRESSION, ErrorCodes::INVALID_JOIN_ON_EXPRESSION,
"OUTER JOIN ON expression {} contains column from left and right table, which is not supported with join_use_nulls", "{} JOIN ON expression {} contains column from left and right table, which is not supported with `join_use_nulls`",
join_expression->formatASTForErrorMessage()); toString(join_node.getKind()), join_expression->formatASTForErrorMessage());
} }
} }
} }
@ -474,7 +474,7 @@ JoinClausesAndActions buildJoinClausesAndActions(
join_right_table_expressions, join_right_table_expressions,
join_node, join_node,
result.join_clauses.back()); result.join_clauses.back());
has_residual_filters |= !result.join_clauses.back().getMixedFilterConditionNodes().empty(); has_residual_filters |= !result.join_clauses.back().getResidualFilterConditionNodes().empty();
} }
} }
else else
@ -491,7 +491,7 @@ JoinClausesAndActions buildJoinClausesAndActions(
join_right_table_expressions, join_right_table_expressions,
join_node, join_node,
result.join_clauses.back()); result.join_clauses.back());
has_residual_filters |= !result.join_clauses.back().getMixedFilterConditionNodes().empty(); has_residual_filters |= !result.join_clauses.back().getResidualFilterConditionNodes().empty();
} }
auto and_function = FunctionFactory::instance().get("and", planner_context->getQueryContext()); auto and_function = FunctionFactory::instance().get("and", planner_context->getQueryContext());
@ -618,29 +618,29 @@ JoinClausesAndActions buildJoinClausesAndActions(
/// So, for each column, we recalculate the value of the whole expression from JOIN ON to check if rows should be joined. /// So, for each column, we recalculate the value of the whole expression from JOIN ON to check if rows should be joined.
if (result.join_clauses.size() > 1) if (result.join_clauses.size() > 1)
{ {
ActionsDAG mixed_join_expressions_actions(result_relation_columns); ActionsDAG residual_join_expressions_actions(result_relation_columns);
PlannerActionsVisitor join_expression_visitor(planner_context); PlannerActionsVisitor join_expression_visitor(planner_context);
auto join_expression_dag_node_raw_pointers = join_expression_visitor.visit(mixed_join_expressions_actions, join_expression); auto join_expression_dag_node_raw_pointers = join_expression_visitor.visit(residual_join_expressions_actions, join_expression);
if (join_expression_dag_node_raw_pointers.size() != 1) if (join_expression_dag_node_raw_pointers.size() != 1)
throw Exception( throw Exception(
ErrorCodes::LOGICAL_ERROR, "JOIN {} ON clause contains multiple expressions", join_node.formatASTForErrorMessage()); ErrorCodes::LOGICAL_ERROR, "JOIN {} ON clause contains multiple expressions", join_node.formatASTForErrorMessage());
mixed_join_expressions_actions.addOrReplaceInOutputs(*join_expression_dag_node_raw_pointers[0]); residual_join_expressions_actions.addOrReplaceInOutputs(*join_expression_dag_node_raw_pointers[0]);
Names required_names{join_expression_dag_node_raw_pointers[0]->result_name}; Names required_names{join_expression_dag_node_raw_pointers[0]->result_name};
mixed_join_expressions_actions.removeUnusedActions(required_names); residual_join_expressions_actions.removeUnusedActions(required_names);
result.mixed_join_expressions_actions = std::move(mixed_join_expressions_actions); result.residual_join_expressions_actions = std::move(residual_join_expressions_actions);
} }
else else
{ {
const auto & join_clause = result.join_clauses.front(); const auto & join_clause = result.join_clauses.front();
const auto & mixed_filter_condition_nodes = join_clause.getMixedFilterConditionNodes(); const auto & residual_filter_condition_nodes = join_clause.getResidualFilterConditionNodes();
auto mixed_join_expressions_actions = ActionsDAG::buildFilterActionsDAG(mixed_filter_condition_nodes, {}, true); auto residual_join_expressions_actions = ActionsDAG::buildFilterActionsDAG(residual_filter_condition_nodes, {}, true);
result.mixed_join_expressions_actions = std::move(mixed_join_expressions_actions); result.residual_join_expressions_actions = std::move(residual_join_expressions_actions);
} }
auto outputs = result.mixed_join_expressions_actions->getOutputs(); auto outputs = result.residual_join_expressions_actions->getOutputs();
if (outputs.size() != 1) if (outputs.size() != 1)
{ {
throw Exception(ErrorCodes::LOGICAL_ERROR, "Only one output is expected, got: {}", result.mixed_join_expressions_actions->dumpDAG()); throw Exception(ErrorCodes::LOGICAL_ERROR, "Only one output is expected, got: {}", result.residual_join_expressions_actions->dumpDAG());
} }
auto output_type = removeNullable(outputs[0]->result_type); auto output_type = removeNullable(outputs[0]->result_type);
WhichDataType which_type(output_type); WhichDataType which_type(output_type);
@ -648,8 +648,8 @@ JoinClausesAndActions buildJoinClausesAndActions(
{ {
DataTypePtr uint8_ty = std::make_shared<DataTypeUInt8>(); DataTypePtr uint8_ty = std::make_shared<DataTypeUInt8>();
auto true_col = ColumnWithTypeAndName(uint8_ty->createColumnConst(1, 1), uint8_ty, "true"); auto true_col = ColumnWithTypeAndName(uint8_ty->createColumnConst(1, 1), uint8_ty, "true");
const auto * true_node = &result.mixed_join_expressions_actions->addColumn(true_col); const auto * true_node = &result.residual_join_expressions_actions->addColumn(true_col);
result.mixed_join_expressions_actions = ActionsDAG::buildFilterActionsDAG({outputs[0], true_node}); result.residual_join_expressions_actions = ActionsDAG::buildFilterActionsDAG({outputs[0], true_node});
} }
} }

View File

@ -140,7 +140,7 @@ public:
return right_filter_condition_nodes; return right_filter_condition_nodes;
} }
ActionsDAG::NodeRawConstPtrs & getMixedFilterConditionNodes() ActionsDAG::NodeRawConstPtrs & getResidualFilterConditionNodes()
{ {
return residual_filter_condition_nodes; return residual_filter_condition_nodes;
} }
@ -150,7 +150,7 @@ public:
residual_filter_condition_nodes.push_back(condition_node); residual_filter_condition_nodes.push_back(condition_node);
} }
const ActionsDAG::NodeRawConstPtrs & getMixedFilterConditionNodes() const const ActionsDAG::NodeRawConstPtrs & getResidualFilterConditionNodes() const
{ {
return residual_filter_condition_nodes; return residual_filter_condition_nodes;
} }
@ -190,7 +190,7 @@ struct JoinClausesAndActions
ActionsDAG right_join_expressions_actions; ActionsDAG right_join_expressions_actions;
/// Originally used for inequal join. it's the total join expression. /// Originally used for inequal join. it's the total join expression.
/// If there is no inequal join conditions, it's null. /// If there is no inequal join conditions, it's null.
std::optional<ActionsDAG> mixed_join_expressions_actions; std::optional<ActionsDAG> residual_join_expressions_actions;
}; };
/** Calculate join clauses and actions for JOIN ON section. /** Calculate join clauses and actions for JOIN ON section.

View File

@ -17,7 +17,7 @@ namespace ErrorCodes
bool FilterTransform::canUseType(const DataTypePtr & filter_type) bool FilterTransform::canUseType(const DataTypePtr & filter_type)
{ {
return filter_type->onlyNull() || isUInt8(removeNullable(removeLowCardinality(filter_type))); return filter_type->onlyNull() || isUInt8(removeLowCardinalityAndNullable(filter_type));
} }
Block FilterTransform::transformHeader( Block FilterTransform::transformHeader(