review fix

This commit is contained in:
vdimir 2024-11-04 10:36:30 +00:00
parent a80e11f21b
commit 08ec69736f
No known key found for this signature in database
GPG Key ID: 6EE4CE2BEDC51862
8 changed files with 83 additions and 75 deletions

View File

@ -173,18 +173,7 @@ Result:
└───┴────┴─────┘
```
## [experimental] Join with inequality conditions for columns from different tables
:::note
This feature is experimental. To use it, set `allow_experimental_join_condition` to 1 in your configuration files or by using the `SET` command:
```sql
SET allow_experimental_join_condition=1
```
Otherwise, you'll get `INVALID_JOIN_ON_EXPRESSION`.
:::
## Join with inequality conditions for columns from different tables
Clickhouse currently supports `ALL/ANY/SEMI/ANTI INNER/LEFT/RIGHT/FULL JOIN` with inequality conditions in addition to equality conditions. The inequality conditions are supported only for `hash` and `grace_hash` join algorithms. The inequality conditions are not supported with `join_use_nulls`.

View File

@ -600,15 +600,15 @@ void buildTableExpressionsStackImpl(const QueryTreeNodePtr & join_tree_node, Que
}
QueryTreeNodes buildTableExpressionsStack(const QueryTreeNodePtr & query_node, QueryTreeNodes * out_nearest_query)
QueryTreeNodes buildTableExpressionsStack(const QueryTreeNodePtr & query_node, QueryTreeNodes * nearest_query)
{
QueryTreeNodes result;
QueryTreeNodes nearest_query_tmp;
if (out_nearest_query == nullptr)
out_nearest_query = &nearest_query_tmp;
if (nearest_query == nullptr)
nearest_query = &nearest_query_tmp;
buildTableExpressionsStackImpl(query_node->as<QueryNode>()->getJoinTree(), query_node, result, *out_nearest_query);
buildTableExpressionsStackImpl(query_node->as<QueryNode>()->getJoinTree(), query_node, result, *nearest_query);
return result;
}

View File

@ -67,6 +67,9 @@ QueryTreeNodes extractTableExpressions(const QueryTreeNodePtr & join_tree_node,
QueryTreeNodePtr extractLeftTableExpression(const QueryTreeNodePtr & join_tree_node);
/** Build table expressions stack that consists from table, table function, query, union, join, array join from join tree.
* Parameters:
* query_node - query node that contains join tree.
* nearest_query - if not nullptr, then nearest query node will be stored in this variable.
*
* Example: SELECT * FROM t1 INNER JOIN t2 INNER JOIN t3.
* Result table expressions stack:
@ -75,8 +78,10 @@ QueryTreeNodePtr extractLeftTableExpression(const QueryTreeNodePtr & join_tree_n
* 3. t1 INNER JOIN t2
* 4. t2
* 5. t1
*
* Nearest query is the query itself in this case.
*/
QueryTreeNodes buildTableExpressionsStack(const QueryTreeNodePtr & query_node, QueryTreeNodes * out_nearest_query = nullptr);
QueryTreeNodes buildTableExpressionsStack(const QueryTreeNodePtr & query_node, QueryTreeNodes * nearest_query = nullptr);
/** Assert that there are no function nodes with specified function name in node children.

View File

@ -5,6 +5,7 @@
#include <Interpreters/IJoin.h>
#include <Interpreters/ActionsDAG.h>
#include <Interpreters/ExpressionActions.h>
#include <Core/Joins.h>
namespace DB
{
@ -168,4 +169,18 @@ private:
void setRightIndex(size_t right_pos, size_t result_position);
};
/// Call the same func twice - for left arguments and then right arguments
template <typename Func, typename... Args>
void forJoinSides(Func && func, std::tuple<Args...> && left, std::tuple<Args...> && right) {
std::apply([&](auto &&... args)
{
func(JoinTableSide::Left, std::forward<decltype(args)>(args)...);
}, std::forward<std::tuple<Args...>>(left));
std::apply([&](auto &&... args)
{
func(JoinTableSide::Right, std::forward<decltype(args)>(args)...);
}, std::forward<std::tuple<Args...>>(right));
}
}

View File

@ -1291,12 +1291,13 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
planner_context);
const auto & left_pre_filters = join_clauses_and_actions.join_clauses[0].getLeftFilterConditionNodes();
if (!left_pre_filters.empty() && left_pre_filters.size() != 1)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected only one left pre-filter condition node. Actual [{}]", fmt::join(left_pre_filters | std::views::transform([](const auto & node) { return node->result_name; }), ", "));
const auto & right_pre_filters = join_clauses_and_actions.join_clauses[0].getRightFilterConditionNodes();
if (!right_pre_filters.empty() && right_pre_filters.size() != 1)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected only one right pre-filter condition node. Actual [{}]", fmt::join(right_pre_filters | std::views::transform([](const auto & node) { return node->result_name; }), ", "));
forJoinSides([](JoinTableSide side, const auto & pre_filters)
{
if (!pre_filters.empty() && pre_filters.size() != 1)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected only one {} pre-filter condition node. Actual [{}]",
side, fmt::join(pre_filters | std::views::transform([](const auto & node) { return node->result_name; }), ", "));
}, std::make_tuple(std::ref(left_pre_filters)), std::make_tuple(std::ref(right_pre_filters)));
can_move_out_residuals = join_clauses_and_actions.join_clauses.size() == 1
&& join_strictness == JoinStrictness::All
@ -1304,29 +1305,21 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
&& (right_pre_filters.empty() || FilterStep::canUseType(right_pre_filters[0]->result_type))
&& (left_pre_filters.empty() || FilterStep::canUseType(left_pre_filters[0]->result_type));
join_clauses_and_actions.left_join_expressions_actions.appendInputsForUnusedColumns(left_plan.getCurrentHeader());
appendSetsFromActionsDAG(join_clauses_and_actions.left_join_expressions_actions, left_join_tree_query_plan.useful_sets);
forJoinSides([&](JoinTableSide, ActionsDAG & join_expressions_actions, QueryPlan & plan, const auto & pre_filters)
{
join_expressions_actions.appendInputsForUnusedColumns(left_plan.getCurrentHeader());
appendSetsFromActionsDAG(join_expressions_actions, left_join_tree_query_plan.useful_sets);
QueryPlanStepPtr left_join_expressions_actions_step;
if (can_move_out_residuals && !left_pre_filters.empty())
left_join_expressions_actions_step = std::make_unique<FilterStep>(left_plan.getCurrentHeader(), std::move(join_clauses_and_actions.left_join_expressions_actions), left_pre_filters[0]->result_name, false);
else
left_join_expressions_actions_step = std::make_unique<ExpressionStep>(left_plan.getCurrentHeader(), std::move(join_clauses_and_actions.left_join_expressions_actions));
QueryPlanStepPtr join_expressions_actions_step;
if (can_move_out_residuals && !left_pre_filters.empty())
join_expressions_actions_step = std::make_unique<FilterStep>(plan.getCurrentHeader(), std::move(join_expressions_actions), pre_filters[0]->result_name, false);
else
join_expressions_actions_step = std::make_unique<ExpressionStep>(plan.getCurrentHeader(), std::move(join_expressions_actions));
left_join_expressions_actions_step->setStepDescription("JOIN actions");
left_plan.addStep(std::move(left_join_expressions_actions_step));
join_clauses_and_actions.right_join_expressions_actions.appendInputsForUnusedColumns(right_plan.getCurrentHeader());
appendSetsFromActionsDAG(join_clauses_and_actions.right_join_expressions_actions, right_join_tree_query_plan.useful_sets);
QueryPlanStepPtr right_join_expressions_actions_step;
if (can_move_out_residuals && !right_pre_filters.empty())
right_join_expressions_actions_step = std::make_unique<FilterStep>(right_plan.getCurrentHeader(), std::move(join_clauses_and_actions.right_join_expressions_actions), right_pre_filters[0]->result_name, false);
else
right_join_expressions_actions_step = std::make_unique<ExpressionStep>(right_plan.getCurrentHeader(), std::move(join_clauses_and_actions.right_join_expressions_actions));
right_join_expressions_actions_step->setStepDescription("JOIN actions");
right_plan.addStep(std::move(right_join_expressions_actions_step));
join_expressions_actions_step->setStepDescription("JOIN actions");
plan.addStep(std::move(join_expressions_actions_step));
}, std::make_tuple(std::ref(join_clauses_and_actions.left_join_expressions_actions), std::ref(left_plan), std::ref(left_pre_filters)),
std::make_tuple(std::ref(join_clauses_and_actions.right_join_expressions_actions), std::ref(right_plan), std::ref(right_pre_filters)));
}
std::unordered_map<ColumnIdentifier, DataTypePtr> left_plan_column_name_to_cast_type;
@ -1460,6 +1453,9 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
if (join_clause_key_nodes_size == 0 && !can_move_out_residuals)
throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, "Cannot determine join keys in {}", join_node.formatASTForErrorMessage());
/// If there are no keys, but only conditions that cannot be used as keys, then it is a cross join.
/// Example: SELECT * FROM t1 JOIN t2 ON t1.x > t2.y
/// Same as: SELECT * FROM t1 CROSS JOIN t2 WHERE t1.x > t2.y
if (join_clause_key_nodes_size == 0 && can_move_out_residuals)
{
join_kind = JoinKind::Cross;
@ -1531,15 +1527,16 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
}
}
if (!can_move_out_residuals && join_clauses_and_actions.mixed_join_expressions_actions)
if (!can_move_out_residuals && join_clauses_and_actions.residual_join_expressions_actions)
{
/// Let join algorithm handle residual conditions
ExpressionActionsPtr & mixed_join_expression = table_join->getMixedJoinExpression();
mixed_join_expression = std::make_shared<ExpressionActions>(
std::move(*join_clauses_and_actions.mixed_join_expressions_actions),
std::move(*join_clauses_and_actions.residual_join_expressions_actions),
ExpressionActionsSettings::fromContext(planner_context->getQueryContext()));
appendSetsFromActionsDAG(mixed_join_expression->getActionsDAG(), left_join_tree_query_plan.useful_sets);
join_clauses_and_actions.mixed_join_expressions_actions.reset();
join_clauses_and_actions.residual_join_expressions_actions.reset();
}
}
else if (join_node.isUsingJoinExpression())
@ -1688,26 +1685,28 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
result_plan.unitePlans(std::move(join_step), {std::move(plans)});
}
if (join_clauses_and_actions.mixed_join_expressions_actions)
/// If residuals were not moved to JOIN algorithm,
/// we need to process add then as WHERE condition after JOIN
if (join_clauses_and_actions.residual_join_expressions_actions)
{
auto outputs = join_clauses_and_actions.mixed_join_expressions_actions->getOutputs();
auto outputs = join_clauses_and_actions.residual_join_expressions_actions->getOutputs();
if (outputs.size() != 1)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected 1 output column in JOIN actions, got {}",
join_clauses_and_actions.mixed_join_expressions_actions->dumpDAG());
join_clauses_and_actions.residual_join_expressions_actions->dumpDAG());
join_clauses_and_actions.mixed_join_expressions_actions->appendInputsForUnusedColumns(result_plan.getCurrentHeader());
for (const auto * input_node : join_clauses_and_actions.mixed_join_expressions_actions->getInputs())
join_clauses_and_actions.mixed_join_expressions_actions->addOrReplaceInOutputs(*input_node);
join_clauses_and_actions.residual_join_expressions_actions->appendInputsForUnusedColumns(result_plan.getCurrentHeader());
for (const auto * input_node : join_clauses_and_actions.residual_join_expressions_actions->getInputs())
join_clauses_and_actions.residual_join_expressions_actions->addOrReplaceInOutputs(*input_node);
appendSetsFromActionsDAG(*join_clauses_and_actions.mixed_join_expressions_actions, left_join_tree_query_plan.useful_sets);
appendSetsFromActionsDAG(*join_clauses_and_actions.residual_join_expressions_actions, left_join_tree_query_plan.useful_sets);
auto filter_step = std::make_unique<FilterStep>(result_plan.getCurrentHeader(),
std::move(*join_clauses_and_actions.mixed_join_expressions_actions),
std::move(*join_clauses_and_actions.residual_join_expressions_actions),
outputs[0]->result_name,
/* remove_column = */ false); /// Unused columns will be removed by next step
filter_step->setStepDescription("Residual JOIN filter");
result_plan.addStep(std::move(filter_step));
join_clauses_and_actions.mixed_join_expressions_actions.reset();
join_clauses_and_actions.residual_join_expressions_actions.reset();
}
ActionsDAG drop_unused_columns_after_join_actions_dag(result_plan.getCurrentHeader().getColumnsWithTypeAndName());

View File

@ -378,8 +378,8 @@ void buildJoinClause(
{
throw Exception(
ErrorCodes::INVALID_JOIN_ON_EXPRESSION,
"OUTER JOIN ON expression {} contains column from left and right table, which is not supported with join_use_nulls",
join_expression->formatASTForErrorMessage());
"{} JOIN ON expression {} contains column from left and right table, which is not supported with `join_use_nulls`",
toString(join_node.getKind()), join_expression->formatASTForErrorMessage());
}
}
}
@ -474,7 +474,7 @@ JoinClausesAndActions buildJoinClausesAndActions(
join_right_table_expressions,
join_node,
result.join_clauses.back());
has_residual_filters |= !result.join_clauses.back().getMixedFilterConditionNodes().empty();
has_residual_filters |= !result.join_clauses.back().getResidualFilterConditionNodes().empty();
}
}
else
@ -491,7 +491,7 @@ JoinClausesAndActions buildJoinClausesAndActions(
join_right_table_expressions,
join_node,
result.join_clauses.back());
has_residual_filters |= !result.join_clauses.back().getMixedFilterConditionNodes().empty();
has_residual_filters |= !result.join_clauses.back().getResidualFilterConditionNodes().empty();
}
auto and_function = FunctionFactory::instance().get("and", planner_context->getQueryContext());
@ -618,29 +618,29 @@ JoinClausesAndActions buildJoinClausesAndActions(
/// So, for each column, we recalculate the value of the whole expression from JOIN ON to check if rows should be joined.
if (result.join_clauses.size() > 1)
{
ActionsDAG mixed_join_expressions_actions(result_relation_columns);
ActionsDAG residual_join_expressions_actions(result_relation_columns);
PlannerActionsVisitor join_expression_visitor(planner_context);
auto join_expression_dag_node_raw_pointers = join_expression_visitor.visit(mixed_join_expressions_actions, join_expression);
auto join_expression_dag_node_raw_pointers = join_expression_visitor.visit(residual_join_expressions_actions, join_expression);
if (join_expression_dag_node_raw_pointers.size() != 1)
throw Exception(
ErrorCodes::LOGICAL_ERROR, "JOIN {} ON clause contains multiple expressions", join_node.formatASTForErrorMessage());
mixed_join_expressions_actions.addOrReplaceInOutputs(*join_expression_dag_node_raw_pointers[0]);
residual_join_expressions_actions.addOrReplaceInOutputs(*join_expression_dag_node_raw_pointers[0]);
Names required_names{join_expression_dag_node_raw_pointers[0]->result_name};
mixed_join_expressions_actions.removeUnusedActions(required_names);
result.mixed_join_expressions_actions = std::move(mixed_join_expressions_actions);
residual_join_expressions_actions.removeUnusedActions(required_names);
result.residual_join_expressions_actions = std::move(residual_join_expressions_actions);
}
else
{
const auto & join_clause = result.join_clauses.front();
const auto & mixed_filter_condition_nodes = join_clause.getMixedFilterConditionNodes();
auto mixed_join_expressions_actions = ActionsDAG::buildFilterActionsDAG(mixed_filter_condition_nodes, {}, true);
result.mixed_join_expressions_actions = std::move(mixed_join_expressions_actions);
const auto & residual_filter_condition_nodes = join_clause.getResidualFilterConditionNodes();
auto residual_join_expressions_actions = ActionsDAG::buildFilterActionsDAG(residual_filter_condition_nodes, {}, true);
result.residual_join_expressions_actions = std::move(residual_join_expressions_actions);
}
auto outputs = result.mixed_join_expressions_actions->getOutputs();
auto outputs = result.residual_join_expressions_actions->getOutputs();
if (outputs.size() != 1)
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "Only one output is expected, got: {}", result.mixed_join_expressions_actions->dumpDAG());
throw Exception(ErrorCodes::LOGICAL_ERROR, "Only one output is expected, got: {}", result.residual_join_expressions_actions->dumpDAG());
}
auto output_type = removeNullable(outputs[0]->result_type);
WhichDataType which_type(output_type);
@ -648,8 +648,8 @@ JoinClausesAndActions buildJoinClausesAndActions(
{
DataTypePtr uint8_ty = std::make_shared<DataTypeUInt8>();
auto true_col = ColumnWithTypeAndName(uint8_ty->createColumnConst(1, 1), uint8_ty, "true");
const auto * true_node = &result.mixed_join_expressions_actions->addColumn(true_col);
result.mixed_join_expressions_actions = ActionsDAG::buildFilterActionsDAG({outputs[0], true_node});
const auto * true_node = &result.residual_join_expressions_actions->addColumn(true_col);
result.residual_join_expressions_actions = ActionsDAG::buildFilterActionsDAG({outputs[0], true_node});
}
}

View File

@ -140,7 +140,7 @@ public:
return right_filter_condition_nodes;
}
ActionsDAG::NodeRawConstPtrs & getMixedFilterConditionNodes()
ActionsDAG::NodeRawConstPtrs & getResidualFilterConditionNodes()
{
return residual_filter_condition_nodes;
}
@ -150,7 +150,7 @@ public:
residual_filter_condition_nodes.push_back(condition_node);
}
const ActionsDAG::NodeRawConstPtrs & getMixedFilterConditionNodes() const
const ActionsDAG::NodeRawConstPtrs & getResidualFilterConditionNodes() const
{
return residual_filter_condition_nodes;
}
@ -190,7 +190,7 @@ struct JoinClausesAndActions
ActionsDAG right_join_expressions_actions;
/// Originally used for inequal join. it's the total join expression.
/// If there is no inequal join conditions, it's null.
std::optional<ActionsDAG> mixed_join_expressions_actions;
std::optional<ActionsDAG> residual_join_expressions_actions;
};
/** Calculate join clauses and actions for JOIN ON section.

View File

@ -17,7 +17,7 @@ namespace ErrorCodes
bool FilterTransform::canUseType(const DataTypePtr & filter_type)
{
return filter_type->onlyNull() || isUInt8(removeNullable(removeLowCardinality(filter_type)));
return filter_type->onlyNull() || isUInt8(removeLowCardinalityAndNullable(filter_type));
}
Block FilterTransform::transformHeader(