mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-23 08:02:02 +00:00
apply 27a2b19
This commit is contained in:
parent
7ee720ffb0
commit
6fa6c0261b
@ -164,12 +164,26 @@ Result:
|
||||
│ 4 │ -4 │ 4 │
|
||||
└───┴────┴─────┘
|
||||
```
|
||||
## Join with inequality conditions
|
||||
Clickhouse currently supports inner, left, right and full join with inequality conditions, including with `OR` operator. You need to set `allow_experimental_analyzer = 1` and select `hash` or `grace_hash` join algorithm.
|
||||
|
||||
## [experimental] Join with inequality conditions
|
||||
|
||||
:::note
|
||||
This feature is experimental. To use it, set `allow_experimental_join_condition` to 1 in your configuration files or by using the `SET` command:
|
||||
|
||||
```sql
|
||||
SET allow_experimental_join_condition=1
|
||||
```
|
||||
|
||||
Otherwise, you'll get `INVALID_JOIN_ON_EXPRESSION`.
|
||||
|
||||
:::
|
||||
|
||||
Clickhouse currently supports `ALL INNER/LEFT/RIGHT/FULL JOIN` with inequality conditions in addition to equality conditions. The inequality conditions are supported only for `hash` and `grace_hash` join algorithms. The inequality conditions are not supported with `join_use_nulls`.
|
||||
|
||||
**Example**
|
||||
|
||||
Table `t1`:
|
||||
|
||||
```
|
||||
┌─key──┬─attr─┬─a─┬─b─┬─c─┐
|
||||
│ key1 │ a │ 1 │ 1 │ 2 │
|
||||
@ -183,6 +197,7 @@ Table `t1`:
|
||||
```
|
||||
|
||||
Table `t2`
|
||||
|
||||
```
|
||||
┌─key──┬─attr─┬─a─┬─b─┬─c─┐
|
||||
│ key1 │ A │ 1 │ 2 │ 1 │
|
||||
@ -193,9 +208,11 @@ Table `t2`
|
||||
│ key4 │ F │ 1 │ 1 │ 1 │
|
||||
└──────┴──────┴───┴───┴───┘
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT t1.*, t2.* from t1 LEFT JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr);
|
||||
```
|
||||
|
||||
```
|
||||
key1 a 1 1 2 key1 B 2 1 2
|
||||
key1 a 1 1 2 key1 C 3 4 5
|
||||
|
@ -18,7 +18,6 @@
|
||||
#include <stack>
|
||||
#include <base/sort.h>
|
||||
#include <Common/JSONBuilder.h>
|
||||
#include "ExpressionActions.h"
|
||||
#include <Core/SettingsEnums.h>
|
||||
|
||||
|
||||
@ -617,18 +616,14 @@ static void executeAction(const ExpressionActions::Action & action, ExecutionCon
|
||||
res_column.column = action.node->function->execute(arguments, res_column.type, num_rows, dry_run);
|
||||
if (res_column.column->getDataType() != res_column.type->getColumnType())
|
||||
{
|
||||
WriteBufferFromOwnString out;
|
||||
for (const auto & arg : arguments)
|
||||
out << arg.dumpStructure() << ",";
|
||||
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Unexpected return type from {}. Expected {}. Got {}. Action:\n{},\ninput block structure:{}",
|
||||
action.node->function->getName(),
|
||||
res_column.type->getName(), //res_column.type->getColumnType(),
|
||||
res_column.column->getName(), //res_column.column->getDataType(),
|
||||
res_column.type->getName(),
|
||||
res_column.column->getName(),
|
||||
action.toString(),
|
||||
out.str());
|
||||
Block(arguments).dumpStructure());
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
@ -1573,67 +1573,58 @@ ColumnPtr buildAdditionalFilter(
|
||||
}
|
||||
if (!executed_block)
|
||||
{
|
||||
WriteBufferFromOwnString buf;
|
||||
for (const auto & col : required_cols)
|
||||
{
|
||||
buf << col.name << ", ";
|
||||
}
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"required columns: {}. but not found any in left/right table. right table: {}, left table: {}",
|
||||
buf.str(),
|
||||
"required columns: [{}], but not found any in left/right table. right table: {}, left table: {}",
|
||||
required_cols.toString(),
|
||||
sample_right_block.dumpNames(),
|
||||
added_columns.left_block.dumpNames());
|
||||
}
|
||||
// Debug
|
||||
|
||||
for (const auto & col : executed_block.getColumnsWithTypeAndName())
|
||||
{
|
||||
if (!col.column || !col.type)
|
||||
{
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Null column in input block. {}", executed_block.dumpStructure());
|
||||
}
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal nullptr column in input block: {}", executed_block.dumpStructure());
|
||||
}
|
||||
|
||||
added_columns.additional_filter_expression->execute(executed_block);
|
||||
return executed_block.getByPosition(0).column;
|
||||
|
||||
ColumnPtr result_column = executed_block.getByPosition(0).column->convertToFullColumnIfConst();
|
||||
executed_block.clear();
|
||||
|
||||
if (result_column->isNullable())
|
||||
{
|
||||
/// Convert Nullable(UInt8) to UInt8 ensuring that nulls are zeros
|
||||
/// Trying to avoid copying data, since we are the only owner of the column.
|
||||
ColumnPtr mask_column = assert_cast<const ColumnNullable &>(*result_column).getNullMapColumnPtr();
|
||||
|
||||
MutableColumnPtr mutable_column;
|
||||
{
|
||||
ColumnPtr nested_column = assert_cast<const ColumnNullable &>(*result_column).getNestedColumnPtr();
|
||||
result_column.reset();
|
||||
mutable_column = IColumn::mutate(std::move(nested_column));
|
||||
}
|
||||
|
||||
auto & column_data = assert_cast<ColumnUInt8 &>(*mutable_column).getData();
|
||||
const auto & mask_column_data = assert_cast<const ColumnUInt8 &>(*mask_column).getData();
|
||||
for (size_t i = 0; i < column_data.size(); ++i)
|
||||
{
|
||||
if (mask_column_data[i])
|
||||
column_data[i] = 0;
|
||||
}
|
||||
return mutable_column;
|
||||
}
|
||||
return result_column;
|
||||
}
|
||||
|
||||
template <bool flag_per_row>
|
||||
void addFoundRowRefAll(
|
||||
const RowRefList & row_list,
|
||||
std::vector<RowRef> & selected_rows,
|
||||
IColumn::Offset & current_offset,
|
||||
KnownRowsHolder<flag_per_row> & known_rows [[maybe_unused]])
|
||||
/// Adapter class to pass into addFoundRowAll
|
||||
/// In joinRightColumnsWithAdditionalFilter we don't want to add rows directly into AddedColumns,
|
||||
/// because they need to be filtered by additional_filter_expression.
|
||||
class PreSelectedRows : public std::vector<RowRef>
|
||||
{
|
||||
if constexpr (flag_per_row)
|
||||
{
|
||||
std::unique_ptr<std::vector<KnownRowsHolder<true>::Type>> new_known_rows_ptr;
|
||||
for (auto it = row_list.begin(); it.ok(); ++it)
|
||||
{
|
||||
auto row_ref = std::make_pair(it->block, it->row_num);
|
||||
if (!known_rows.isKnown(row_ref))
|
||||
{
|
||||
selected_rows.emplace_back(row_ref.first, row_ref.second);
|
||||
++current_offset;
|
||||
if (!new_known_rows_ptr)
|
||||
{
|
||||
new_known_rows_ptr = std::make_unique<std::vector<KnownRowsHolder<true>::Type>>();
|
||||
}
|
||||
new_known_rows_ptr->push_back(row_ref);
|
||||
}
|
||||
}
|
||||
|
||||
if (new_known_rows_ptr)
|
||||
known_rows.add(std::cbegin(*new_known_rows_ptr), std::cend(*new_known_rows_ptr));
|
||||
}
|
||||
else
|
||||
{
|
||||
for (auto it = row_list.begin(); it.ok(); ++it)
|
||||
{
|
||||
selected_rows.emplace_back(it->block, it->row_num);
|
||||
++current_offset;
|
||||
}
|
||||
}
|
||||
}
|
||||
public:
|
||||
void appendFromBlock(const Block & block, size_t row_num, bool /* has_default */) { this->emplace_back(&block, row_num); }
|
||||
};
|
||||
|
||||
/// First to collect all matched rows refs by join keys, then filter out rows which are not true in additional filter expression.
|
||||
template <
|
||||
@ -1666,7 +1657,7 @@ NO_INLINE size_t joinRightColumnsWithAddtitionalFilter(
|
||||
using FindResult = typename KeyGetter::FindResult;
|
||||
size_t max_joined_block_rows = added_columns.max_joined_block_rows;
|
||||
size_t left_row_iter = 0;
|
||||
std::vector<RowRef> selected_rows;
|
||||
PreSelectedRows selected_rows;
|
||||
selected_rows.reserve(left_block_rows);
|
||||
std::vector<FindResult> find_results;
|
||||
find_results.reserve(left_block_rows);
|
||||
@ -1709,9 +1700,9 @@ NO_INLINE size_t joinRightColumnsWithAddtitionalFilter(
|
||||
auto & mapped = find_result.getMapped();
|
||||
find_results.push_back(find_result);
|
||||
if (flag_per_row)
|
||||
addFoundRowRefAll<true>(mapped, selected_rows, current_added_rows, all_flag_known_rows);
|
||||
addFoundRowAll<Map, false, true>(mapped, selected_rows, current_added_rows, all_flag_known_rows, nullptr);
|
||||
else
|
||||
addFoundRowRefAll<false>(mapped, selected_rows, current_added_rows, single_flag_know_rows);
|
||||
addFoundRowAll<Map, false, false>(mapped, selected_rows, current_added_rows, single_flag_know_rows, nullptr);
|
||||
}
|
||||
}
|
||||
row_replicate_offset.push_back(current_added_rows);
|
||||
@ -1720,17 +1711,7 @@ NO_INLINE size_t joinRightColumnsWithAddtitionalFilter(
|
||||
|
||||
auto copy_final_matched_rows = [&](size_t left_start_row, ColumnPtr filter_col)
|
||||
{
|
||||
const PaddedPODArray<UInt8> * filter_flags = nullptr;
|
||||
filter_col = filter_col->convertToFullIfNeeded();
|
||||
if (filter_col->isNullable())
|
||||
{
|
||||
auto nested_col = typeid_cast<const ColumnNullable &>(*filter_col).getNestedColumnPtr();
|
||||
filter_flags = &(dynamic_cast<const ColumnUInt8 &>(*nested_col).getData());
|
||||
}
|
||||
else
|
||||
{
|
||||
filter_flags = &(dynamic_cast<const ColumnUInt8 &>(*filter_col).getData());
|
||||
}
|
||||
const PaddedPODArray<UInt8> & filter_flags = assert_cast<const ColumnUInt8 &>(*filter_col).getData();
|
||||
|
||||
size_t prev_replicated_row = 0;
|
||||
auto selected_right_row_it = selected_rows.begin();
|
||||
@ -1743,7 +1724,7 @@ NO_INLINE size_t joinRightColumnsWithAddtitionalFilter(
|
||||
{
|
||||
for (size_t replicated_row = prev_replicated_row; replicated_row < row_replicate_offset[i]; ++replicated_row)
|
||||
{
|
||||
if ((*filter_flags)[replicated_row])
|
||||
if (filter_flags[replicated_row])
|
||||
{
|
||||
any_matched = true;
|
||||
added_columns.appendFromBlock(*selected_right_row_it->block, selected_right_row_it->row_num, add_missing);
|
||||
@ -1758,7 +1739,7 @@ NO_INLINE size_t joinRightColumnsWithAddtitionalFilter(
|
||||
{
|
||||
for (size_t replicated_row = prev_replicated_row; replicated_row < row_replicate_offset[i]; ++replicated_row)
|
||||
{
|
||||
if ((*filter_flags)[replicated_row])
|
||||
if (filter_flags[replicated_row])
|
||||
{
|
||||
any_matched = true;
|
||||
added_columns.appendFromBlock(*selected_right_row_it->block, selected_right_row_it->row_num, add_missing);
|
||||
@ -1979,48 +1960,30 @@ size_t joinRightColumnsSwitchMultipleDisjuncts(
|
||||
AddedColumns & added_columns,
|
||||
JoinStuff::JoinUsedFlags & used_flags [[maybe_unused]])
|
||||
{
|
||||
auto join_without_additional_filter = [&]()
|
||||
{
|
||||
return mapv.size() > 1 ? joinRightColumns<KIND, STRICTNESS, KeyGetter, Map, need_filter, true>(
|
||||
std::forward<std::vector<KeyGetter>>(key_getter_vector), mapv, added_columns, used_flags)
|
||||
: joinRightColumns<KIND, STRICTNESS, KeyGetter, Map, need_filter, false>(
|
||||
std::forward<std::vector<KeyGetter>>(key_getter_vector), mapv, added_columns, used_flags);
|
||||
};
|
||||
|
||||
constexpr JoinFeatures<KIND, STRICTNESS> join_features;
|
||||
if constexpr (join_features.is_all_join)
|
||||
{
|
||||
if (added_columns.additional_filter_expression)
|
||||
{
|
||||
constexpr bool mark_per_row_used = join_features.right || join_features.full;
|
||||
return mapv.size() > 1 ? joinRightColumnsWithAddtitionalFilter<KeyGetter, Map, join_features.need_replication>(
|
||||
std::forward<std::vector<KeyGetter>>(key_getter_vector),
|
||||
mapv,
|
||||
added_columns,
|
||||
used_flags,
|
||||
need_filter,
|
||||
join_features.need_flags,
|
||||
join_features.add_missing,
|
||||
true)
|
||||
: joinRightColumnsWithAddtitionalFilter<KeyGetter, Map, join_features.need_replication>(
|
||||
std::forward<std::vector<KeyGetter>>(key_getter_vector),
|
||||
mapv,
|
||||
added_columns,
|
||||
used_flags,
|
||||
need_filter,
|
||||
join_features.need_flags,
|
||||
join_features.add_missing,
|
||||
mark_per_row_used);
|
||||
}
|
||||
else
|
||||
{
|
||||
return join_without_additional_filter();
|
||||
bool mark_per_row_used = join_features.right || join_features.full || mapv.size() > 1;
|
||||
return joinRightColumnsWithAddtitionalFilter<KeyGetter, Map, join_features.need_replication>(
|
||||
std::forward<std::vector<KeyGetter>>(key_getter_vector),
|
||||
mapv,
|
||||
added_columns,
|
||||
used_flags,
|
||||
need_filter,
|
||||
join_features.need_flags,
|
||||
join_features.add_missing,
|
||||
mark_per_row_used);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
return join_without_additional_filter();
|
||||
}
|
||||
|
||||
if (added_columns.additional_filter_expression)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Additional filter expression is not supported for this JOIN");
|
||||
|
||||
return mapv.size() > 1
|
||||
? joinRightColumns<KIND, STRICTNESS, KeyGetter, Map, need_filter, true>(std::forward<std::vector<KeyGetter>>(key_getter_vector), mapv, added_columns, used_flags)
|
||||
: joinRightColumns<KIND, STRICTNESS, KeyGetter, Map, need_filter, false>(std::forward<std::vector<KeyGetter>>(key_getter_vector), mapv, added_columns, used_flags);
|
||||
}
|
||||
|
||||
template <JoinKind KIND, JoinStrictness STRICTNESS, typename KeyGetter, typename Map, typename AddedColumns>
|
||||
@ -2796,6 +2759,7 @@ void HashJoin::validateAdditionalFilterExpression(ExpressionActionsPtr additiona
|
||||
{
|
||||
if (!additional_filter_expression)
|
||||
return;
|
||||
|
||||
Block expression_sample_block = additional_filter_expression->getSampleBlock();
|
||||
|
||||
if (expression_sample_block.columns() != 1)
|
||||
@ -2818,7 +2782,7 @@ void HashJoin::validateAdditionalFilterExpression(ExpressionActionsPtr additiona
|
||||
if (!is_supported)
|
||||
{
|
||||
throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION,
|
||||
"Non equi condition '{}' from JOIN ON section is supported only for ALL INNER/LEFT/FULL/RIGHT JOINs.",
|
||||
"Non equi condition '{}' from JOIN ON section is supported only for ALL INNER/LEFT/FULL/RIGHT JOINs",
|
||||
expression_sample_block.getByPosition(0).name);
|
||||
}
|
||||
}
|
||||
|
@ -459,7 +459,7 @@ JoinClausesAndActions buildJoinClausesAndActions(
|
||||
join_right_table_expressions,
|
||||
join_node,
|
||||
result.join_clauses.back());
|
||||
is_inequal_join |= result.join_clauses.back().hasMixedFilterCondition();
|
||||
is_inequal_join |= !result.join_clauses.back().getMixedFilterConditionNodes().empty();
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -476,7 +476,7 @@ JoinClausesAndActions buildJoinClausesAndActions(
|
||||
join_right_table_expressions,
|
||||
join_node,
|
||||
result.join_clauses.back());
|
||||
is_inequal_join |= result.join_clauses.back().hasMixedFilterCondition();
|
||||
is_inequal_join |= !result.join_clauses.back().getMixedFilterConditionNodes().empty();
|
||||
}
|
||||
|
||||
auto and_function = FunctionFactory::instance().get("and", planner_context->getQueryContext());
|
||||
@ -595,9 +595,10 @@ JoinClausesAndActions buildJoinClausesAndActions(
|
||||
result.right_join_tmp_expression_actions = std::move(right_join_actions);
|
||||
result.right_join_expressions_actions->removeUnusedActions(join_right_actions_names);
|
||||
|
||||
/// If there is any inequal join condition, we need to build full join expressions actions.
|
||||
if (is_inequal_join)
|
||||
{
|
||||
/// In case of multiple disjuncts and any inequal join condition, we need to build full join on expression actions.
|
||||
/// So, for each column, we recalculate the value of the whole expression from JOIN ON to check if rows should be joined.
|
||||
if (result.join_clauses.size() > 1)
|
||||
{
|
||||
auto mixed_join_expressions_actions = std::make_shared<ActionsDAG>(mixed_table_expression_columns);
|
||||
@ -622,7 +623,7 @@ JoinClausesAndActions buildJoinClausesAndActions(
|
||||
auto outputs = result.mixed_join_expressions_actions->getOutputs();
|
||||
if (outputs.size() != 1)
|
||||
{
|
||||
throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, "Only one output is expected. but got:\n{}", result.mixed_join_expressions_actions->dumpDAG());
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Only one output is expected, got: {}", result.mixed_join_expressions_actions->dumpDAG());
|
||||
}
|
||||
auto output_type = removeNullable(outputs[0]->result_type);
|
||||
WhichDataType which_type(output_type);
|
||||
@ -846,11 +847,12 @@ std::shared_ptr<IJoin> chooseJoinAlgorithm(std::shared_ptr<TableJoin> & table_jo
|
||||
const Block & right_table_expression_header,
|
||||
const PlannerContextPtr & planner_context)
|
||||
{
|
||||
if (table_join->getMixedJoinExpression() && !table_join->isEnabledAlgorithm(JoinAlgorithm::HASH)
|
||||
if (table_join->getMixedJoinExpression()
|
||||
&& !table_join->isEnabledAlgorithm(JoinAlgorithm::HASH)
|
||||
&& !table_join->isEnabledAlgorithm(JoinAlgorithm::GRACE_HASH))
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED,
|
||||
"JOIN with mixed conditions supports only hash join or grace hash join with one disjunct.");
|
||||
"JOIN with mixed conditions supports only hash join or grace hash join");
|
||||
}
|
||||
|
||||
trySetStorageInTableJoin(right_table_expression, table_join);
|
||||
|
@ -155,10 +155,6 @@ public:
|
||||
return mixed_filter_condition_nodes;
|
||||
}
|
||||
|
||||
bool hasMixedFilterCondition() const
|
||||
{
|
||||
return !mixed_filter_condition_nodes.empty();
|
||||
}
|
||||
/// Dump clause into buffer
|
||||
void dump(WriteBuffer & buffer) const;
|
||||
|
||||
|
@ -7,7 +7,7 @@ CREATE TABLE t2 (key String, attr String, a UInt64, b UInt64, c Nullable(UInt64)
|
||||
INSERT INTO t2 VALUES ('key1', 'A', 1, 2, 1), ('key1', 'B', 2, 1, 2), ('key1', 'C', 3, 4, 5), ('key1', 'D', 4, 1, 6), ('key3', 'a3', 1, 1, 1), ('key4', 'F', 1,1,1);
|
||||
|
||||
SET allow_experimental_analyzer=1;
|
||||
SET allow_mixed_join_condition=1;
|
||||
SET allow_experimental_join_condition=1;
|
||||
SET join_use_nulls=0;
|
||||
-- { echoOn }
|
||||
{% for algorithm in ['hash', 'grace_hash'] -%}
|
||||
|
Loading…
Reference in New Issue
Block a user