diff --git a/docs/en/sql-reference/statements/select/join.md b/docs/en/sql-reference/statements/select/join.md index 95d0489d694..4ef407a4d13 100644 --- a/docs/en/sql-reference/statements/select/join.md +++ b/docs/en/sql-reference/statements/select/join.md @@ -165,6 +165,68 @@ Result: └───┴────┴─────┘ ``` +## [experimental] Join with inequality conditions + +:::note +This feature is experimental. To use it, set `allow_experimental_join_condition` to 1 in your configuration files or by using the `SET` command: + +```sql +SET allow_experimental_join_condition=1 +``` + +Otherwise, you'll get `INVALID_JOIN_ON_EXPRESSION`. + +::: + +Clickhouse currently supports `ALL INNER/LEFT/RIGHT/FULL JOIN` with inequality conditions in addition to equality conditions. The inequality conditions are supported only for `hash` and `grace_hash` join algorithms. The inequality conditions are not supported with `join_use_nulls`. + +**Example** + +Table `t1`: + +``` +┌─key──┬─attr─┬─a─┬─b─┬─c─┐ +│ key1 │ a │ 1 │ 1 │ 2 │ +│ key1 │ b │ 2 │ 3 │ 2 │ +│ key1 │ c │ 3 │ 2 │ 1 │ +│ key1 │ d │ 4 │ 7 │ 2 │ +│ key1 │ e │ 5 │ 5 │ 5 │ +│ key2 │ a2 │ 1 │ 1 │ 1 │ +│ key4 │ f │ 2 │ 3 │ 4 │ +└──────┴──────┴───┴───┴───┘ +``` + +Table `t2` + +``` +┌─key──┬─attr─┬─a─┬─b─┬─c─┐ +│ key1 │ A │ 1 │ 2 │ 1 │ +│ key1 │ B │ 2 │ 1 │ 2 │ +│ key1 │ C │ 3 │ 4 │ 5 │ +│ key1 │ D │ 4 │ 1 │ 6 │ +│ key3 │ a3 │ 1 │ 1 │ 1 │ +│ key4 │ F │ 1 │ 1 │ 1 │ +└──────┴──────┴───┴───┴───┘ +``` + +```sql +SELECT t1.*, t2.* from t1 LEFT JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +``` + +``` +key1 a 1 1 2 key1 B 2 1 2 +key1 a 1 1 2 key1 C 3 4 5 +key1 a 1 1 2 key1 D 4 1 6 +key1 b 2 3 2 key1 C 3 4 5 +key1 b 2 3 2 key1 D 4 1 6 +key1 c 3 2 1 key1 D 4 1 6 +key1 d 4 7 2 0 0 \N +key1 e 5 5 5 0 0 \N +key2 a2 1 1 1 0 0 \N +key4 f 2 3 4 0 0 \N +``` + + ## NULL values in JOIN keys The NULL is not equal to any value, including itself. It means that if a JOIN key has a NULL value in one table, it won't match a NULL value in the other table. @@ -273,7 +335,7 @@ For example, consider the following tables: ## PASTE JOIN Usage The result of `PASTE JOIN` is a table that contains all columns from left subquery followed by all columns from the right subquery. -The rows are matched based on their positions in the original tables (the order of rows should be defined). +The rows are matched based on their positions in the original tables (the order of rows should be defined). If the subqueries return a different number of rows, extra rows will be cut. Example: diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 2cb3e598811..63d879d2239 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -322,6 +322,7 @@ class IColumn; M(Bool, fsync_metadata, true, "Do fsync after changing metadata for tables and databases (.sql files). Could be disabled in case of poor latency on server with high load of DDL queries and high load of disk subsystem.", 0) \ \ M(Bool, join_use_nulls, false, "Use NULLs for non-joined rows of outer JOINs for types that can be inside Nullable. If false, use default value of corresponding columns data type.", IMPORTANT) \ + M(Bool, allow_experimental_join_condition, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y.", IMPORTANT) \ \ M(JoinStrictness, join_default_strictness, JoinStrictness::All, "Set default strictness in JOIN query. Possible values: empty string, 'ANY', 'ALL'. If empty, query without strictness will throw exception.", 0) \ M(Bool, any_join_distinct_right_table_keys, false, "Enable old ANY JOIN logic with many-to-one left-to-right table keys mapping for all ANY JOINs. It leads to confusing not equal results for 't1 ANY LEFT JOIN t2' and 't2 ANY RIGHT JOIN t1'. ANY RIGHT JOIN needs one-to-many keys mapping to be consistent with LEFT one.", IMPORTANT) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index a575acba6e6..4df60330c93 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -96,6 +96,7 @@ static std::map sett {"temporary_data_in_cache_reserve_space_wait_lock_timeout_milliseconds", (10 * 60 * 1000), (10 * 60 * 1000), "Wait time to lock cache for sapce reservation in temporary data in filesystem cache"}, {"optimize_rewrite_sum_if_to_count_if", false, true, "Only available for the analyzer, where it works correctly"}, {"azure_allow_parallel_part_upload", "true", "true", "Use multiple threads for azure multipart upload."}, + {"allow_experimental_join_condition", false, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y."}, {"max_recursive_cte_evaluation_depth", DBMS_RECURSIVE_CTE_MAX_EVALUATION_DEPTH, DBMS_RECURSIVE_CTE_MAX_EVALUATION_DEPTH, "Maximum limit on recursive CTE evaluation depth"}, {"query_plan_convert_outer_join_to_inner_join", false, true, "Allow to convert OUTER JOIN to INNER JOIN if filter after JOIN always filters default values"}, }}, diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index 1d193b3499c..04f29f35c3c 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -615,12 +615,16 @@ static void executeAction(const ExpressionActions::Action & action, ExecutionCon res_column.column = action.node->function->execute(arguments, res_column.type, num_rows, dry_run); if (res_column.column->getDataType() != res_column.type->getColumnType()) + { throw Exception( ErrorCodes::LOGICAL_ERROR, - "Unexpected return type from {}. Expected {}. Got {}", + "Unexpected return type from {}. Expected {}. Got {}. Action:\n{},\ninput block structure:{}", action.node->function->getName(), - res_column.type->getColumnType(), - res_column.column->getDataType()); + res_column.type->getName(), + res_column.column->getName(), + action.toString(), + Block(arguments).dumpStructure()); + } } break; } diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index a555505417e..0c759d381d7 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -16,14 +16,17 @@ #include +#include #include #include +#include #include #include #include #include #include +#include #include @@ -50,6 +53,7 @@ namespace ErrorCodes extern const int SET_SIZE_LIMIT_EXCEEDED; extern const int TYPE_MISMATCH; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int INVALID_JOIN_ON_EXPRESSION; } namespace @@ -119,14 +123,14 @@ namespace JoinStuff } } - template + template void JoinUsedFlags::setUsed(const FindResult & f) { if constexpr (!use_flags) return; /// Could be set simultaneously from different threads. - if constexpr (multiple_disjuncts) + if constexpr (flag_per_row) { auto & mapped = f.getMapped(); flags[mapped.block][mapped.row_num].store(true, std::memory_order_relaxed); @@ -137,14 +141,14 @@ namespace JoinStuff } } - template + template void JoinUsedFlags::setUsed(const Block * block, size_t row_num, size_t offset) { if constexpr (!use_flags) return; /// Could be set simultaneously from different threads. - if constexpr (multiple_disjuncts) + if constexpr (flag_per_row) { flags[block][row_num].store(true, std::memory_order_relaxed); } @@ -154,13 +158,13 @@ namespace JoinStuff } } - template + template bool JoinUsedFlags::getUsed(const FindResult & f) { if constexpr (!use_flags) return true; - if constexpr (multiple_disjuncts) + if constexpr (flag_per_row) { auto & mapped = f.getMapped(); return flags[mapped.block][mapped.row_num].load(); @@ -171,13 +175,13 @@ namespace JoinStuff } } - template + template bool JoinUsedFlags::setUsedOnce(const FindResult & f) { if constexpr (!use_flags) return true; - if constexpr (multiple_disjuncts) + if constexpr (flag_per_row) { auto & mapped = f.getMapped(); @@ -253,6 +257,8 @@ HashJoin::HashJoin(std::shared_ptr table_join_, const Block & right_s LOG_TRACE(log, "{}Keys: {}, datatype: {}, kind: {}, strictness: {}, right header: {}", instance_log_id, TableJoin::formatClauses(table_join->getClauses(), true), data->type, kind, strictness, right_sample_block.dumpStructure()); + validateAdditionalFilterExpression(table_join->getMixedJoinExpression()); + if (isCrossOrComma(kind)) { data->type = Type::CROSS; @@ -705,7 +711,8 @@ void HashJoin::initRightBlockStructure(Block & saved_block_sample) bool save_key_columns = table_join->isEnabledAlgorithm(JoinAlgorithm::AUTO) || table_join->isEnabledAlgorithm(JoinAlgorithm::GRACE_HASH) || isRightOrFull(kind) || - multiple_disjuncts; + multiple_disjuncts || + table_join->getMixedJoinExpression(); if (save_key_columns) { saved_block_sample = right_table_keys.cloneEmpty(); @@ -835,7 +842,7 @@ bool HashJoin::addBlockToJoin(const Block & source_block_, bool check_limits) if (rows) data->empty = false; - bool multiple_disjuncts = !table_join->oneDisjunct(); + bool flag_per_row = needUsedFlagsForPerRightTableRow(table_join); const auto & onexprs = table_join->getClauses(); for (size_t onexpr_idx = 0; onexpr_idx < onexprs.size(); ++onexpr_idx) { @@ -859,7 +866,7 @@ bool HashJoin::addBlockToJoin(const Block & source_block_, bool check_limits) auto join_mask_col = JoinCommon::getColumnAsMask(source_block, onexprs[onexpr_idx].condColumnNames().second); /// Save blocks that do not hold conditions in ON section ColumnUInt8::MutablePtr not_joined_map = nullptr; - if (!multiple_disjuncts && isRightOrFull(kind) && join_mask_col.hasData()) + if (!flag_per_row && isRightOrFull(kind) && join_mask_col.hasData()) { const auto & join_mask = join_mask_col.getData(); /// Save rows that do not hold conditions @@ -889,7 +896,7 @@ bool HashJoin::addBlockToJoin(const Block & source_block_, bool check_limits) join_mask_col.getData(), data->pool, is_inserted); - if (multiple_disjuncts) + if (flag_per_row) used_flags.reinit(stored_block); else if (is_inserted) /// Number of buckets + 1 value from zero storage @@ -897,19 +904,19 @@ bool HashJoin::addBlockToJoin(const Block & source_block_, bool check_limits) }); } - if (!multiple_disjuncts && save_nullmap && is_inserted) + if (!flag_per_row && save_nullmap && is_inserted) { data->blocks_nullmaps_allocated_size += null_map_holder->allocatedBytes(); data->blocks_nullmaps.emplace_back(stored_block, null_map_holder); } - if (!multiple_disjuncts && not_joined_map && is_inserted) + if (!flag_per_row && not_joined_map && is_inserted) { data->blocks_nullmaps_allocated_size += not_joined_map->allocatedBytes(); data->blocks_nullmaps.emplace_back(stored_block, std::move(not_joined_map)); } - if (!multiple_disjuncts && !is_inserted) + if (!flag_per_row && !is_inserted) { LOG_TRACE(log, "Skipping inserting block with {} rows", rows); data->blocks_allocated_size -= stored_block->allocatedBytes(); @@ -1044,14 +1051,17 @@ public: }; AddedColumns( - const Block & left_block, + const Block & left_block_, const Block & block_with_columns_to_add, const Block & saved_block_sample, const HashJoin & join, std::vector && join_on_keys_, + ExpressionActionsPtr additional_filter_expression_, bool is_asof_join, bool is_join_get_) - : join_on_keys(join_on_keys_) + : left_block(left_block_) + , join_on_keys(join_on_keys_) + , additional_filter_expression(additional_filter_expression_) , rows_to_add(left_block.rows()) , is_join_get(is_join_get_) { @@ -1120,7 +1130,9 @@ public: const IColumn & leftAsofKey() const { return *left_asof_key; } + Block left_block; std::vector join_on_keys; + ExpressionActionsPtr additional_filter_expression; size_t max_joined_block_rows = 0; size_t rows_to_add; @@ -1221,7 +1233,7 @@ void AddedColumns::buildOutput() { if (!lazy_output.blocks[j]) { - default_count ++; + default_count++; continue; } apply_default(); @@ -1340,7 +1352,7 @@ struct JoinFeatures static constexpr bool need_flags = MapGetter::flagged; }; -template +template class KnownRowsHolder; /// Keep already joined rows to prevent duplication if many disjuncts @@ -1415,18 +1427,18 @@ public: } }; -template +template void addFoundRowAll( const typename Map::mapped_type & mapped, AddedColumns & added, IColumn::Offset & current_offset, - KnownRowsHolder & known_rows [[maybe_unused]], + KnownRowsHolder & known_rows [[maybe_unused]], JoinStuff::JoinUsedFlags * used_flags [[maybe_unused]]) { if constexpr (add_missing) added.applyLazyDefaults(); - if constexpr (multiple_disjuncts) + if constexpr (flag_per_row) { std::unique_ptr::Type>> new_known_rows_ptr; @@ -1443,7 +1455,7 @@ void addFoundRowAll( new_known_rows_ptr->push_back(std::make_pair(it->block, it->row_num)); if (used_flags) { - used_flags->JoinStuff::JoinUsedFlags::setUsedOnce( + used_flags->JoinStuff::JoinUsedFlags::setUsedOnce( FindResultImpl(*it, true, 0)); } } @@ -1482,9 +1494,324 @@ void setUsed(IColumn::Filter & filter [[maybe_unused]], size_t pos [[maybe_unuse filter[pos] = 1; } +template +ColumnPtr buildAdditionalFilter( + size_t left_start_row, + const std::vector & selected_rows, + const std::vector & row_replicate_offset, + AddedColumns & added_columns) +{ + ColumnPtr result_column; + do + { + if (selected_rows.empty()) + { + result_column = ColumnUInt8::create(); + break; + } + const Block & sample_right_block = *selected_rows.begin()->block; + if (!sample_right_block || !added_columns.additional_filter_expression) + { + auto filter = ColumnUInt8::create(); + filter->insertMany(1, selected_rows.size()); + result_column = std::move(filter); + break; + } + + auto required_cols = added_columns.additional_filter_expression->getRequiredColumnsWithTypes(); + if (required_cols.empty()) + { + Block block; + added_columns.additional_filter_expression->execute(block); + result_column = block.getByPosition(0).column->cloneResized(selected_rows.size()); + break; + } + NameSet required_column_names; + for (auto & col : required_cols) + required_column_names.insert(col.name); + + Block executed_block; + size_t right_col_pos = 0; + for (const auto & col : sample_right_block.getColumnsWithTypeAndName()) + { + if (required_column_names.contains(col.name)) + { + auto new_col = col.column->cloneEmpty(); + for (const auto & selected_row : selected_rows) + { + const auto & src_col = selected_row.block->getByPosition(right_col_pos); + new_col->insertFrom(*src_col.column, selected_row.row_num); + } + executed_block.insert({std::move(new_col), col.type, col.name}); + } + right_col_pos += 1; + } + if (!executed_block) + { + result_column = ColumnUInt8::create(); + break; + } + + for (const auto & col_name : required_column_names) + { + const auto * src_col = added_columns.left_block.findByName(col_name); + if (!src_col) + continue; + auto new_col = src_col->column->cloneEmpty(); + size_t prev_left_offset = 0; + for (size_t i = 1; i < row_replicate_offset.size(); ++i) + { + const size_t & left_offset = row_replicate_offset[i]; + size_t rows = left_offset - prev_left_offset; + if (rows) + new_col->insertManyFrom(*src_col->column, left_start_row + i - 1, rows); + prev_left_offset = left_offset; + } + executed_block.insert({std::move(new_col), src_col->type, col_name}); + } + if (!executed_block) + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "required columns: [{}], but not found any in left/right table. right table: {}, left table: {}", + required_cols.toString(), + sample_right_block.dumpNames(), + added_columns.left_block.dumpNames()); + } + + for (const auto & col : executed_block.getColumnsWithTypeAndName()) + if (!col.column || !col.type) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal nullptr column in input block: {}", executed_block.dumpStructure()); + + added_columns.additional_filter_expression->execute(executed_block); + result_column = executed_block.getByPosition(0).column->convertToFullColumnIfConst(); + executed_block.clear(); + } while (false); + + result_column = result_column->convertToFullIfNeeded(); + if (result_column->isNullable()) + { + /// Convert Nullable(UInt8) to UInt8 ensuring that nulls are zeros + /// Trying to avoid copying data, since we are the only owner of the column. + ColumnPtr mask_column = assert_cast(*result_column).getNullMapColumnPtr(); + + MutableColumnPtr mutable_column; + { + ColumnPtr nested_column = assert_cast(*result_column).getNestedColumnPtr(); + result_column.reset(); + mutable_column = IColumn::mutate(std::move(nested_column)); + } + + auto & column_data = assert_cast(*mutable_column).getData(); + const auto & mask_column_data = assert_cast(*mask_column).getData(); + for (size_t i = 0; i < column_data.size(); ++i) + { + if (mask_column_data[i]) + column_data[i] = 0; + } + return mutable_column; + } + return result_column; +} + +/// Adapter class to pass into addFoundRowAll +/// In joinRightColumnsWithAdditionalFilter we don't want to add rows directly into AddedColumns, +/// because they need to be filtered by additional_filter_expression. +class PreSelectedRows : public std::vector +{ +public: + void appendFromBlock(const Block & block, size_t row_num, bool /* has_default */) { this->emplace_back(&block, row_num); } +}; + +/// First to collect all matched rows refs by join keys, then filter out rows which are not true in additional filter expression. +template < + typename KeyGetter, + typename Map, + bool need_replication, + typename AddedColumns> +NO_INLINE size_t joinRightColumnsWithAddtitionalFilter( + std::vector && key_getter_vector, + const std::vector & mapv, + AddedColumns & added_columns, + JoinStuff::JoinUsedFlags & used_flags [[maybe_unused]], + bool need_filter [[maybe_unused]], + bool need_flags [[maybe_unused]], + bool add_missing [[maybe_unused]], + bool flag_per_row [[maybe_unused]]) +{ + size_t left_block_rows = added_columns.rows_to_add; + if (need_filter) + added_columns.filter = IColumn::Filter(left_block_rows, 0); + + std::unique_ptr pool; + + if constexpr (need_replication) + added_columns.offsets_to_replicate = std::make_unique(left_block_rows); + + std::vector row_replicate_offset; + row_replicate_offset.reserve(left_block_rows); + + using FindResult = typename KeyGetter::FindResult; + size_t max_joined_block_rows = added_columns.max_joined_block_rows; + size_t left_row_iter = 0; + PreSelectedRows selected_rows; + selected_rows.reserve(left_block_rows); + std::vector find_results; + find_results.reserve(left_block_rows); + bool exceeded_max_block_rows = false; + IColumn::Offset total_added_rows = 0; + IColumn::Offset current_added_rows = 0; + + auto collect_keys_matched_rows_refs = [&]() + { + pool = std::make_unique(); + find_results.clear(); + row_replicate_offset.clear(); + row_replicate_offset.push_back(0); + current_added_rows = 0; + selected_rows.clear(); + for (; left_row_iter < left_block_rows; ++left_row_iter) + { + if constexpr (need_replication) + { + if (unlikely(total_added_rows + current_added_rows >= max_joined_block_rows)) + { + break; + } + } + KnownRowsHolder all_flag_known_rows; + KnownRowsHolder single_flag_know_rows; + for (size_t join_clause_idx = 0; join_clause_idx < added_columns.join_on_keys.size(); ++join_clause_idx) + { + const auto & join_keys = added_columns.join_on_keys[join_clause_idx]; + if (join_keys.null_map && (*join_keys.null_map)[left_row_iter]) + continue; + + bool row_acceptable = !join_keys.isRowFiltered(left_row_iter); + auto find_result = row_acceptable + ? key_getter_vector[join_clause_idx].findKey(*(mapv[join_clause_idx]), left_row_iter, *pool) + : FindResult(); + + if (find_result.isFound()) + { + auto & mapped = find_result.getMapped(); + find_results.push_back(find_result); + if (flag_per_row) + addFoundRowAll(mapped, selected_rows, current_added_rows, all_flag_known_rows, nullptr); + else + addFoundRowAll(mapped, selected_rows, current_added_rows, single_flag_know_rows, nullptr); + } + } + row_replicate_offset.push_back(current_added_rows); + } + }; + + auto copy_final_matched_rows = [&](size_t left_start_row, ColumnPtr filter_col) + { + const PaddedPODArray & filter_flags = assert_cast(*filter_col).getData(); + + size_t prev_replicated_row = 0; + auto selected_right_row_it = selected_rows.begin(); + size_t find_result_index = 0; + for (size_t i = 1, n = row_replicate_offset.size(); i < n; ++i) + { + bool any_matched = false; + /// For all right join, flag_per_row is true, we need mark used flags for each row. + if (flag_per_row) + { + for (size_t replicated_row = prev_replicated_row; replicated_row < row_replicate_offset[i]; ++replicated_row) + { + if (filter_flags[replicated_row]) + { + any_matched = true; + added_columns.appendFromBlock(*selected_right_row_it->block, selected_right_row_it->row_num, add_missing); + total_added_rows += 1; + if (need_flags) + used_flags.template setUsed(selected_right_row_it->block, selected_right_row_it->row_num, 0); + } + ++selected_right_row_it; + } + } + else + { + for (size_t replicated_row = prev_replicated_row; replicated_row < row_replicate_offset[i]; ++replicated_row) + { + if (filter_flags[replicated_row]) + { + any_matched = true; + added_columns.appendFromBlock(*selected_right_row_it->block, selected_right_row_it->row_num, add_missing); + total_added_rows += 1; + } + ++selected_right_row_it; + } + } + if (!any_matched) + { + if (add_missing) + addNotFoundRow(added_columns, total_added_rows); + else + addNotFoundRow(added_columns, total_added_rows); + } + else + { + if (!flag_per_row && need_flags) + used_flags.template setUsed(find_results[find_result_index]); + if (need_filter) + setUsed(added_columns.filter, left_start_row + i - 1); + if (add_missing) + added_columns.applyLazyDefaults(); + } + find_result_index += (prev_replicated_row != row_replicate_offset[i]); + + if constexpr (need_replication) + { + (*added_columns.offsets_to_replicate)[left_start_row + i - 1] = total_added_rows; + } + prev_replicated_row = row_replicate_offset[i]; + } + }; + + while (left_row_iter < left_block_rows && !exceeded_max_block_rows) + { + auto left_start_row = left_row_iter; + collect_keys_matched_rows_refs(); + if (selected_rows.size() != current_added_rows || row_replicate_offset.size() != left_row_iter - left_start_row + 1) + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Sizes are mismatched. selected_rows.size:{}, current_added_rows:{}, row_replicate_offset.size:{}, left_row_iter: {}, " + "left_start_row: {}", + selected_rows.size(), + current_added_rows, + row_replicate_offset.size(), + left_row_iter, + left_start_row); + } + auto filter_col = buildAdditionalFilter(left_start_row, selected_rows, row_replicate_offset, added_columns); + copy_final_matched_rows(left_start_row, filter_col); + + if constexpr (need_replication) + { + // Add a check for current_added_rows to avoid run the filter expression on too small size batch. + if (total_added_rows >= max_joined_block_rows || current_added_rows < 1024) + { + exceeded_max_block_rows = true; + } + } + } + + if constexpr (need_replication) + { + added_columns.offsets_to_replicate->resize_assume_reserved(left_row_iter); + added_columns.filter.resize_assume_reserved(left_row_iter); + } + added_columns.applyLazyDefaults(); + return left_row_iter; +} + /// Joins right table columns which indexes are present in right_indexes using specified map. /// Makes filter (1 if row presented in right table) and returns offsets to replicate (for ALL JOINS). -template +template NO_INLINE size_t joinRightColumns( std::vector && key_getter_vector, const std::vector & mapv, @@ -1519,7 +1846,7 @@ NO_INLINE size_t joinRightColumns( bool right_row_found = false; - KnownRowsHolder known_rows; + KnownRowsHolder known_rows; for (size_t onexpr_idx = 0; onexpr_idx < added_columns.join_on_keys.size(); ++onexpr_idx) { const auto & join_keys = added_columns.join_on_keys[onexpr_idx]; @@ -1542,10 +1869,10 @@ NO_INLINE size_t joinRightColumns( if (row_ref.block) { setUsed(added_columns.filter, i); - if constexpr (multiple_disjuncts) - used_flags.template setUsed(row_ref.block, row_ref.row_num, 0); + if constexpr (flag_per_row) + used_flags.template setUsed(row_ref.block, row_ref.row_num, 0); else - used_flags.template setUsed(find_result); + used_flags.template setUsed(find_result); added_columns.appendFromBlock(*row_ref.block, row_ref.row_num, join_features.add_missing); } @@ -1555,14 +1882,14 @@ NO_INLINE size_t joinRightColumns( else if constexpr (join_features.is_all_join) { setUsed(added_columns.filter, i); - used_flags.template setUsed(find_result); + used_flags.template setUsed(find_result); auto used_flags_opt = join_features.need_flags ? &used_flags : nullptr; addFoundRowAll(mapped, added_columns, current_offset, known_rows, used_flags_opt); } else if constexpr ((join_features.is_any_join || join_features.is_semi_join) && join_features.right) { /// Use first appeared left key + it needs left columns replication - bool used_once = used_flags.template setUsedOnce(find_result); + bool used_once = used_flags.template setUsedOnce(find_result); if (used_once) { auto used_flags_opt = join_features.need_flags ? &used_flags : nullptr; @@ -1572,7 +1899,7 @@ NO_INLINE size_t joinRightColumns( } else if constexpr (join_features.is_any_join && KIND == JoinKind::Inner) { - bool used_once = used_flags.template setUsedOnce(find_result); + bool used_once = used_flags.template setUsedOnce(find_result); /// Use first appeared left key only if (used_once) @@ -1590,12 +1917,12 @@ NO_INLINE size_t joinRightColumns( else if constexpr (join_features.is_anti_join) { if constexpr (join_features.right && join_features.need_flags) - used_flags.template setUsed(find_result); + used_flags.template setUsed(find_result); } else /// ANY LEFT, SEMI LEFT, old ANY (RightAny) { setUsed(added_columns.filter, i); - used_flags.template setUsed(find_result); + used_flags.template setUsed(find_result); added_columns.appendFromBlock(*mapped.block, mapped.row_num, join_features.add_missing); if (join_features.is_any_or_semi_join) @@ -1630,6 +1957,27 @@ size_t joinRightColumnsSwitchMultipleDisjuncts( AddedColumns & added_columns, JoinStuff::JoinUsedFlags & used_flags [[maybe_unused]]) { + constexpr JoinFeatures join_features; + if constexpr (join_features.is_all_join) + { + if (added_columns.additional_filter_expression) + { + bool mark_per_row_used = join_features.right || join_features.full || mapv.size() > 1; + return joinRightColumnsWithAddtitionalFilter( + std::forward>(key_getter_vector), + mapv, + added_columns, + used_flags, + need_filter, + join_features.need_flags, + join_features.add_missing, + mark_per_row_used); + } + } + + if (added_columns.additional_filter_expression) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Additional filter expression is not supported for this JOIN"); + return mapv.size() > 1 ? joinRightColumns(std::forward>(key_getter_vector), mapv, added_columns, used_flags) : joinRightColumns(std::forward>(key_getter_vector), mapv, added_columns, used_flags); @@ -1788,8 +2136,14 @@ Block HashJoin::joinBlockImpl( * For ASOF, the last column is used as the ASOF column */ AddedColumns added_columns( - block, block_with_columns_to_add, savedBlockSample(), *this, std::move(join_on_keys), join_features.is_asof_join, is_join_get); - + block, + block_with_columns_to_add, + savedBlockSample(), + *this, + std::move(join_on_keys), + table_join->getMixedJoinExpression(), + join_features.is_asof_join, + is_join_get); bool has_required_right_keys = (required_right_keys.columns() != 0); added_columns.need_filter = join_features.need_filter || has_required_right_keys; @@ -1856,11 +2210,15 @@ Block HashJoin::joinBlockImpl( /// If ALL ... JOIN - we replicate all the columns except the new ones. for (size_t i = 0; i < existing_columns; ++i) + { block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->replicate(*offsets_to_replicate); + } /// Replicate additional right keys for (size_t pos : right_keys_to_replicate) + { block.safeGetByPosition(pos).column = block.safeGetByPosition(pos).column->replicate(*offsets_to_replicate); + } } return remaining_block; @@ -2108,10 +2466,10 @@ struct AdderNonJoined class NotJoinedHash final : public NotJoinedBlocks::RightColumnsFiller { public: - NotJoinedHash(const HashJoin & parent_, UInt64 max_block_size_, bool multiple_disjuncts_) + NotJoinedHash(const HashJoin & parent_, UInt64 max_block_size_, bool flag_per_row_) : parent(parent_) , max_block_size(max_block_size_) - , multiple_disjuncts(multiple_disjuncts_) + , flag_per_row(flag_per_row_) , current_block_start(0) { if (parent.data == nullptr) @@ -2138,7 +2496,7 @@ public: throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown JOIN strictness '{}' (must be on of: ANY, ALL, ASOF)", parent.strictness); } - if (!multiple_disjuncts) + if (!flag_per_row) { fillNullsFromBlocks(columns_right, rows_added); } @@ -2149,7 +2507,7 @@ public: private: const HashJoin & parent; UInt64 max_block_size; - bool multiple_disjuncts; + bool flag_per_row; size_t current_block_start; @@ -2215,7 +2573,7 @@ private: { size_t rows_added = 0; - if (multiple_disjuncts) + if (flag_per_row) { if (!used_position.has_value()) used_position = parent.data->blocks.begin(); @@ -2307,8 +2665,8 @@ IBlocksStreamPtr HashJoin::getNonJoinedBlocks(const Block & left_sample_block, return {}; size_t left_columns_count = left_sample_block.columns(); - bool multiple_disjuncts = !table_join->oneDisjunct(); - if (!multiple_disjuncts) + bool flag_per_row = needUsedFlagsForPerRightTableRow(table_join); + if (!flag_per_row) { /// With multiple disjuncts, all keys are in sample_block_with_columns_to_add, so invariant is not held size_t expected_columns_count = left_columns_count + required_right_keys.columns() + sample_block_with_columns_to_add.columns(); @@ -2320,7 +2678,7 @@ IBlocksStreamPtr HashJoin::getNonJoinedBlocks(const Block & left_sample_block, } } - auto non_joined = std::make_unique(*this, max_block_size, multiple_disjuncts); + auto non_joined = std::make_unique(*this, max_block_size, flag_per_row); return std::make_unique(std::move(non_joined), result_sample_block, left_columns_count, *table_join); } @@ -2329,8 +2687,8 @@ void HashJoin::reuseJoinedData(const HashJoin & join) data = join.data; from_storage_join = true; - bool multiple_disjuncts = !table_join->oneDisjunct(); - if (multiple_disjuncts) + bool flag_per_row = needUsedFlagsForPerRightTableRow(table_join); + if (flag_per_row) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "StorageJoin with ORs is not supported"); for (auto & map : data->maps) @@ -2394,4 +2752,46 @@ const ColumnWithTypeAndName & HashJoin::rightAsofKeyColumn() const return savedBlockSample().getByName(table_join->getOnlyClause().key_names_right.back()); } +void HashJoin::validateAdditionalFilterExpression(ExpressionActionsPtr additional_filter_expression) +{ + if (!additional_filter_expression) + return; + + Block expression_sample_block = additional_filter_expression->getSampleBlock(); + + if (expression_sample_block.columns() != 1) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Unexpected expression in JOIN ON section. Expected single column, got '{}'", + expression_sample_block.dumpStructure()); + } + + auto type = removeNullable(expression_sample_block.getByPosition(0).type); + if (!type->equals(*std::make_shared())) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Unexpected expression in JOIN ON section. Expected boolean (UInt8), got '{}'. expression:\n{}", + expression_sample_block.getByPosition(0).type->getName(), + additional_filter_expression->dumpActions()); + } + + bool is_supported = (strictness == JoinStrictness::All) && (isInnerOrLeft(kind) || isRightOrFull(kind)); + if (!is_supported) + { + throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, + "Non equi condition '{}' from JOIN ON section is supported only for ALL INNER/LEFT/FULL/RIGHT JOINs", + expression_sample_block.getByPosition(0).name); + } +} + +bool HashJoin::needUsedFlagsForPerRightTableRow(std::shared_ptr table_join_) const +{ + if (!table_join_->oneDisjunct()) + return true; + /// If it'a a all right join with inequal conditions, we need to mark each row + if (table_join_->getMixedJoinExpression() && isRightOrFull(table_join_->kind())) + return true; + return false; +} + } diff --git a/src/Interpreters/HashJoin.h b/src/Interpreters/HashJoin.h index 3079c189300..b7f41a7eb6b 100644 --- a/src/Interpreters/HashJoin.h +++ b/src/Interpreters/HashJoin.h @@ -31,6 +31,7 @@ namespace DB { class TableJoin; +class ExpressionActions; namespace JoinStuff { @@ -60,16 +61,16 @@ public: bool getUsedSafe(size_t i) const; bool getUsedSafe(const Block * block_ptr, size_t row_idx) const; - template + template void setUsed(const T & f); - template + template void setUsed(const Block * block, size_t row_num, size_t offset); - template + template bool getUsed(const T & f); - template + template bool setUsedOnce(const T & f); }; @@ -485,6 +486,9 @@ private: static Type chooseMethod(JoinKind kind, const ColumnRawPtrs & key_columns, Sizes & key_sizes); bool empty() const; + + void validateAdditionalFilterExpression(std::shared_ptr additional_filter_expression); + bool needUsedFlagsForPerRightTableRow(std::shared_ptr table_join_) const; }; } diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h index 6f8773d64a7..31ac58578d5 100644 --- a/src/Interpreters/TableJoin.h +++ b/src/Interpreters/TableJoin.h @@ -28,6 +28,7 @@ class ASTSelectQuery; struct DatabaseAndTableWithAlias; class Block; class DictionaryJoinAdapter; +class ExpressionActions; class StorageJoin; class StorageDictionary; class IKeyValueEntity; @@ -153,6 +154,8 @@ private: ASTs key_asts_right; Clauses clauses; + /// Originally used for inequal join. If there is no any inequal join condition, it will be nullptr. + std::shared_ptr mixed_join_expression = nullptr; ASTTableJoin table_join; @@ -301,6 +304,9 @@ public: std::vector & getClauses() { return clauses; } const std::vector & getClauses() const { return clauses; } + const std::shared_ptr & getMixedJoinExpression() const { return mixed_join_expression; } + std::shared_ptr & getMixedJoinExpression() { return mixed_join_expression; } + Names getAllNames(JoinTableSide side) const; void resetCollected(); diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index cbdcb30ad33..514c19b0f89 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -1305,6 +1305,14 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_ std::swap(table_join_clause.key_names_right.at(asof_condition.key_index), table_join_clause.key_names_right.back()); } } + + if (join_clauses_and_actions.mixed_join_expressions_actions) + { + ExpressionActionsPtr & mixed_join_expression = table_join->getMixedJoinExpression(); + mixed_join_expression = std::make_shared( + join_clauses_and_actions.mixed_join_expressions_actions, + ExpressionActionsSettings::fromContext(planner_context->getQueryContext())); + } } else if (join_node.isUsingJoinExpression()) { diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp index 78b0e966ca4..1fdf51f399f 100644 --- a/src/Planner/PlannerJoins.cpp +++ b/src/Planner/PlannerJoins.cpp @@ -10,6 +10,7 @@ #include #include +#include #include #include @@ -125,13 +126,13 @@ TableExpressionSet extractTableExpressionsSet(const QueryTreeNodePtr & node) return res; } -std::optional extractJoinTableSideFromExpression( +std::set extractJoinTableSidesFromExpression(//const ActionsDAG::Node * expression_root_node, const IQueryTreeNode * expression_root_node, const TableExpressionSet & left_table_expressions, const TableExpressionSet & right_table_expressions, const JoinNode & join_node) { - std::optional table_side; + std::set table_sides; std::vector nodes_to_process; nodes_to_process.push_back(expression_root_node); @@ -169,15 +170,10 @@ std::optional extractJoinTableSideFromExpression( join_node.getRightTableExpression()->formatASTForErrorMessage()); auto input_table_side = is_column_from_left_expr ? JoinTableSide::Left : JoinTableSide::Right; - if (table_side && (*table_side) != input_table_side) - throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, - "JOIN {} join expression contains column from left and right table", - join_node.formatASTForErrorMessage()); - - table_side = input_table_side; + table_sides.insert(input_table_side); } - return table_side; + return table_sides; } const ActionsDAG::Node * appendExpression( @@ -199,6 +195,7 @@ const ActionsDAG::Node * appendExpression( void buildJoinClause( ActionsDAGPtr & left_dag, ActionsDAGPtr & right_dag, + ActionsDAGPtr & mixed_dag, const PlannerContextPtr & planner_context, const QueryTreeNodePtr & join_expression, const TableExpressionSet & left_table_expressions, @@ -219,6 +216,7 @@ void buildJoinClause( buildJoinClause( left_dag, right_dag, + mixed_dag, planner_context, child, left_table_expressions, @@ -238,38 +236,42 @@ void buildJoinClause( const auto left_child = function_node->getArguments().getNodes().at(0); const auto right_child = function_node->getArguments().getNodes().at(1); - auto left_expression_side_optional = extractJoinTableSideFromExpression(left_child.get(), + auto left_expression_sides = extractJoinTableSidesFromExpression(left_child.get(), left_table_expressions, right_table_expressions, join_node); - auto right_expression_side_optional = extractJoinTableSideFromExpression(right_child.get(), + auto right_expression_sides = extractJoinTableSidesFromExpression(right_child.get(), left_table_expressions, right_table_expressions, join_node); - if (!left_expression_side_optional && !right_expression_side_optional) + if (left_expression_sides.empty() && right_expression_sides.empty()) { throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, - "JOIN {} ON expression with constants is not supported", + "JOIN {} ON expression expected non-empty left and right table expressions", join_node.formatASTForErrorMessage()); } - else if (left_expression_side_optional && !right_expression_side_optional) + else if (left_expression_sides.size() == 1 && right_expression_sides.empty()) { - auto & dag = *left_expression_side_optional == JoinTableSide::Left ? left_dag : right_dag; + auto expression_side = *left_expression_sides.begin(); + auto & dag = expression_side == JoinTableSide::Left ? left_dag : right_dag; const auto * node = appendExpression(dag, join_expression, planner_context, join_node); - join_clause.addCondition(*left_expression_side_optional, node); + join_clause.addCondition(expression_side, node); + } - else if (!left_expression_side_optional && right_expression_side_optional) + else if (left_expression_sides.empty() && right_expression_sides.size() == 1) { - auto & dag = *right_expression_side_optional == JoinTableSide::Left ? left_dag : right_dag; + auto expression_side = *right_expression_sides.begin(); + auto & dag = expression_side == JoinTableSide::Left ? left_dag : right_dag; const auto * node = appendExpression(dag, join_expression, planner_context, join_node); - join_clause.addCondition(*right_expression_side_optional, node); + join_clause.addCondition(expression_side, node); + } - else + else if (left_expression_sides.size() == 1 && right_expression_sides.size() == 1) { - auto left_expression_side = *left_expression_side_optional; - auto right_expression_side = *right_expression_side_optional; + auto left_expression_side = *left_expression_sides.begin(); + auto right_expression_side = *right_expression_sides.begin(); if (left_expression_side != right_expression_side) { @@ -310,23 +312,62 @@ void buildJoinClause( join_clause.addCondition(left_expression_side, node); } } + else + { + auto support_mixed_join_condition = planner_context->getQueryContext()->getSettingsRef().allow_experimental_join_condition; + auto join_use_nulls = planner_context->getQueryContext()->getSettingsRef().join_use_nulls; + /// If join_use_nulls = true, the columns' nullability will be changed later which make this expression not right. + if (support_mixed_join_condition && !join_use_nulls) + { + /// expression involves both tables. + /// `expr1(left.col1, right.col2) == expr2(left.col3, right.col4)` + const auto * node = appendExpression(mixed_dag, join_expression, planner_context, join_node); + join_clause.addMixedCondition(node); + } + else + { + throw Exception( + ErrorCodes::INVALID_JOIN_ON_EXPRESSION, + "JOIN {} join expression contains column from left and right table", + join_node.formatASTForErrorMessage()); + } + } - return; } - - auto expression_side_optional = extractJoinTableSideFromExpression( - join_expression.get(), - left_table_expressions, - right_table_expressions, - join_node); - - if (!expression_side_optional) - expression_side_optional = JoinTableSide::Right; - - auto expression_side = *expression_side_optional; - auto & dag = expression_side == JoinTableSide::Left ? left_dag : right_dag; - const auto * node = appendExpression(dag, join_expression, planner_context, join_node); - join_clause.addCondition(expression_side, node); + else + { + auto expression_sides = extractJoinTableSidesFromExpression(join_expression.get(), + left_table_expressions, + right_table_expressions, + join_node); + // expression_sides.empty() = true, the expression is constant + if (expression_sides.empty() || expression_sides.size() == 1) + { + auto expression_side = expression_sides.empty() ? JoinTableSide::Right : *expression_sides.begin(); + auto & dag = expression_side == JoinTableSide::Left ? left_dag : right_dag; + const auto * node = appendExpression(dag, join_expression, planner_context, join_node); + join_clause.addCondition(expression_side, node); + } + else + { + auto support_mixed_join_condition = planner_context->getQueryContext()->getSettingsRef().allow_experimental_join_condition; + auto join_use_nulls = planner_context->getQueryContext()->getSettingsRef().join_use_nulls; + /// If join_use_nulls = true, the columns' nullability will be changed later which make this expression not right. + if (support_mixed_join_condition && !join_use_nulls) + { + /// expression involves both tables. + const auto * node = appendExpression(mixed_dag, join_expression, planner_context, join_node); + join_clause.addMixedCondition(node); + } + else + { + throw Exception( + ErrorCodes::INVALID_JOIN_ON_EXPRESSION, + "JOIN {} join expression contains column from left and right table", + join_node.formatASTForErrorMessage()); + } + } + } } JoinClausesAndActions buildJoinClausesAndActions( @@ -337,6 +378,16 @@ JoinClausesAndActions buildJoinClausesAndActions( { ActionsDAGPtr left_join_actions = std::make_shared(left_table_expression_columns); ActionsDAGPtr right_join_actions = std::make_shared(right_table_expression_columns); + ColumnsWithTypeAndName mixed_table_expression_columns; + for (const auto & left_column : left_table_expression_columns) + { + mixed_table_expression_columns.push_back(left_column); + } + for (const auto & right_column : right_table_expression_columns) + { + mixed_table_expression_columns.push_back(right_column); + } + ActionsDAGPtr mixed_join_actions = std::make_shared(mixed_table_expression_columns); /** It is possible to have constant value in JOIN ON section, that we need to ignore during DAG construction. * If we do not ignore it, this function will be replaced by underlying constant. @@ -390,6 +441,7 @@ JoinClausesAndActions buildJoinClausesAndActions( JoinClausesAndActions result; + bool is_inequal_join = false; const auto & function_name = function_node->getFunction()->getName(); if (function_name == "or") { @@ -400,12 +452,14 @@ JoinClausesAndActions buildJoinClausesAndActions( buildJoinClause( left_join_actions, right_join_actions, + mixed_join_actions, planner_context, child, join_left_table_expressions, join_right_table_expressions, join_node, result.join_clauses.back()); + is_inequal_join |= !result.join_clauses.back().getMixedFilterConditionNodes().empty(); } } else @@ -415,12 +469,14 @@ JoinClausesAndActions buildJoinClausesAndActions( buildJoinClause( left_join_actions, right_join_actions, + mixed_join_actions, planner_context, join_expression, join_left_table_expressions, join_right_table_expressions, join_node, result.join_clauses.back()); + is_inequal_join |= !result.join_clauses.back().getMixedFilterConditionNodes().empty(); } auto and_function = FunctionFactory::instance().get("and", planner_context->getQueryContext()); @@ -441,7 +497,6 @@ JoinClausesAndActions buildJoinClausesAndActions( if (!left_filter_condition_nodes.empty()) { const ActionsDAG::Node * dag_filter_condition_node = nullptr; - if (left_filter_condition_nodes.size() > 1) dag_filter_condition_node = &left_join_actions->addFunction(and_function, left_filter_condition_nodes, {}); else @@ -540,6 +595,47 @@ JoinClausesAndActions buildJoinClausesAndActions( result.right_join_tmp_expression_actions = std::move(right_join_actions); result.right_join_expressions_actions->removeUnusedActions(join_right_actions_names); + if (is_inequal_join) + { + /// In case of multiple disjuncts and any inequal join condition, we need to build full join on expression actions. + /// So, for each column, we recalculate the value of the whole expression from JOIN ON to check if rows should be joined. + if (result.join_clauses.size() > 1) + { + auto mixed_join_expressions_actions = std::make_shared(mixed_table_expression_columns); + PlannerActionsVisitor join_expression_visitor(planner_context); + auto join_expression_dag_node_raw_pointers = join_expression_visitor.visit(mixed_join_expressions_actions, join_expression); + if (join_expression_dag_node_raw_pointers.size() != 1) + throw Exception( + ErrorCodes::LOGICAL_ERROR, "JOIN {} ON clause contains multiple expressions", join_node.formatASTForErrorMessage()); + + mixed_join_expressions_actions->addOrReplaceInOutputs(*join_expression_dag_node_raw_pointers[0]); + Names required_names{join_expression_dag_node_raw_pointers[0]->result_name}; + mixed_join_expressions_actions->removeUnusedActions(required_names); + result.mixed_join_expressions_actions = mixed_join_expressions_actions; + } + else + { + const auto & join_clause = result.join_clauses.front(); + const auto & mixed_filter_condition_nodes = join_clause.getMixedFilterConditionNodes(); + auto mixed_join_expressions_actions = ActionsDAG::buildFilterActionsDAG(mixed_filter_condition_nodes, {}, true); + result.mixed_join_expressions_actions = mixed_join_expressions_actions; + } + auto outputs = result.mixed_join_expressions_actions->getOutputs(); + if (outputs.size() != 1) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Only one output is expected, got: {}", result.mixed_join_expressions_actions->dumpDAG()); + } + auto output_type = removeNullable(outputs[0]->result_type); + WhichDataType which_type(output_type); + if (!which_type.isUInt8()) + { + DataTypePtr uint8_ty = std::make_shared(); + auto true_col = ColumnWithTypeAndName(uint8_ty->createColumnConst(1, 1), uint8_ty, "true"); + const auto * true_node = &result.mixed_join_expressions_actions->addColumn(true_col); + result.mixed_join_expressions_actions = ActionsDAG::buildFilterActionsDAG({outputs[0], true_node}); + } + } + return result; } @@ -751,6 +847,14 @@ std::shared_ptr chooseJoinAlgorithm(std::shared_ptr & table_jo const Block & right_table_expression_header, const PlannerContextPtr & planner_context) { + if (table_join->getMixedJoinExpression() + && !table_join->isEnabledAlgorithm(JoinAlgorithm::HASH) + && !table_join->isEnabledAlgorithm(JoinAlgorithm::GRACE_HASH)) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "JOIN with mixed conditions supports only hash join or grace hash join"); + } + trySetStorageInTableJoin(right_table_expression, table_join); auto & right_table_expression_data = planner_context->getTableExpressionDataOrThrow(right_table_expression); diff --git a/src/Planner/PlannerJoins.h b/src/Planner/PlannerJoins.h index 7bc65cfb544..8adf6edd7ea 100644 --- a/src/Planner/PlannerJoins.h +++ b/src/Planner/PlannerJoins.h @@ -140,6 +140,21 @@ public: return right_filter_condition_nodes; } + ActionsDAG::NodeRawConstPtrs & getMixedFilterConditionNodes() + { + return mixed_filter_condition_nodes; + } + + void addMixedCondition(const ActionsDAG::Node * condition_node) + { + mixed_filter_condition_nodes.push_back(condition_node); + } + + const ActionsDAG::NodeRawConstPtrs & getMixedFilterConditionNodes() const + { + return mixed_filter_condition_nodes; + } + /// Dump clause into buffer void dump(WriteBuffer & buffer) const; @@ -154,6 +169,8 @@ private: ActionsDAG::NodeRawConstPtrs left_filter_condition_nodes; ActionsDAG::NodeRawConstPtrs right_filter_condition_nodes; + /// conditions which involve both left and right tables + ActionsDAG::NodeRawConstPtrs mixed_filter_condition_nodes; std::unordered_set nullsafe_compare_key_indexes; }; @@ -171,6 +188,9 @@ struct JoinClausesAndActions ActionsDAGPtr left_join_expressions_actions; /// Right join expressions actions ActionsDAGPtr right_join_expressions_actions; + /// Originally used for inequal join. it's the total join expression. + /// If there is no inequal join conditions, it's null. + ActionsDAGPtr mixed_join_expressions_actions; }; /** Calculate join clauses and actions for JOIN ON section. diff --git a/tests/queries/0_stateless/03006_join_on_inequal_expression_fast.reference b/tests/queries/0_stateless/03006_join_on_inequal_expression_fast.reference new file mode 100644 index 00000000000..806596f8a63 --- /dev/null +++ b/tests/queries/0_stateless/03006_join_on_inequal_expression_fast.reference @@ -0,0 +1,366 @@ +-- { echoOn } +SET join_algorithm='hash'; +SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 A 1 2 1 +key1 a 1 1 2 key1 B 2 1 2 +key1 a 1 1 2 key1 C 3 4 5 +key1 a 1 1 2 key1 D 4 1 6 +key1 b 2 3 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 b 2 3 2 key1 D 4 1 6 +key1 c 3 2 1 key1 C 3 4 5 +key1 c 3 2 1 key1 D 4 1 6 +key1 d 4 7 2 key1 D 4 1 6 +key1 e 5 5 5 0 0 \N +key2 a2 1 1 1 0 0 \N +key4 f 2 3 4 key4 F 1 1 1 +SELECT t1.*, t2.* from t1 LEFT JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 c 3 2 1 key1 B 2 1 2 +key1 c 3 2 1 key1 C 3 4 5 +key1 d 4 7 2 key1 D 4 1 6 +key1 e 5 5 5 0 0 \N +key2 a2 1 1 1 0 0 \N +key4 f 2 3 4 0 0 \N +SELECT t1.*, t2.* from t1 LEFT JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 B 2 1 2 +key1 a 1 1 2 key1 C 3 4 5 +key1 a 1 1 2 key1 D 4 1 6 +key1 b 2 3 2 key1 C 3 4 5 +key1 b 2 3 2 key1 D 4 1 6 +key1 c 3 2 1 key1 D 4 1 6 +key1 d 4 7 2 0 0 \N +key1 e 5 5 5 0 0 \N +key2 a2 1 1 1 0 0 \N +key4 f 2 3 4 0 0 \N +SELECT t1.*, t2.* from t1 LEFT JOIN t2 ON t1.key = t2.key and t1.c ORDER BY (t1.key, t1.attr, t2.key, t2.attr); -- { serverError INVALID_JOIN_ON_EXPRESSION } +SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 LEFT JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); +1 1 1 1 1 1 +SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 A 1 2 1 +key1 a 1 1 2 key1 B 2 1 2 +key1 a 1 1 2 key1 C 3 4 5 +key1 a 1 1 2 key1 D 4 1 6 +key1 b 2 3 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 b 2 3 2 key1 D 4 1 6 +key1 c 3 2 1 key1 C 3 4 5 +key1 c 3 2 1 key1 D 4 1 6 +key1 d 4 7 2 key1 D 4 1 6 +key4 f 2 3 4 key4 F 1 1 1 +SELECT t1.*, t2.* from t1 INNER JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 c 3 2 1 key1 B 2 1 2 +key1 c 3 2 1 key1 C 3 4 5 +key1 d 4 7 2 key1 D 4 1 6 +SELECT t1.*, t2.* from t1 INNER JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 B 2 1 2 +key1 a 1 1 2 key1 C 3 4 5 +key1 a 1 1 2 key1 D 4 1 6 +key1 b 2 3 2 key1 C 3 4 5 +key1 b 2 3 2 key1 D 4 1 6 +key1 c 3 2 1 key1 D 4 1 6 +SELECT t1.*, t2.* from t1 INNER JOIN t2 ON t1.key = t2.key and t1.c ORDER BY (t1.key, t1.attr, t2.key, t2.attr); -- { serverError INVALID_JOIN_ON_EXPRESSION } +SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 INNER JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); +1 1 1 1 1 1 +SELECT t1.*, t2.* FROM t1 RIGHT JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); + 0 0 \N key3 a3 1 1 1 +key1 a 1 1 2 key1 A 1 2 1 +key1 a 1 1 2 key1 B 2 1 2 +key1 a 1 1 2 key1 C 3 4 5 +key1 a 1 1 2 key1 D 4 1 6 +key1 b 2 3 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 b 2 3 2 key1 D 4 1 6 +key1 c 3 2 1 key1 C 3 4 5 +key1 c 3 2 1 key1 D 4 1 6 +key1 d 4 7 2 key1 D 4 1 6 +key4 f 2 3 4 key4 F 1 1 1 +SELECT t1.*, t2.* from t1 RIGHT JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); + 0 0 \N key3 a3 1 1 1 + 0 0 \N key4 F 1 1 1 +key1 a 1 1 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 c 3 2 1 key1 B 2 1 2 +key1 c 3 2 1 key1 C 3 4 5 +key1 d 4 7 2 key1 D 4 1 6 +SELECT t1.*, t2.* from t1 RIGHT JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); + 0 0 \N key1 A 1 2 1 + 0 0 \N key3 a3 1 1 1 + 0 0 \N key4 F 1 1 1 +key1 a 1 1 2 key1 B 2 1 2 +key1 a 1 1 2 key1 C 3 4 5 +key1 a 1 1 2 key1 D 4 1 6 +key1 b 2 3 2 key1 C 3 4 5 +key1 b 2 3 2 key1 D 4 1 6 +key1 c 3 2 1 key1 D 4 1 6 +SELECT t1.*, t2.* from t1 RIGHT JOIN t2 ON t1.key = t2.key and t1.c ORDER BY (t1.key, t1.attr, t2.key, t2.attr); -- { serverError INVALID_JOIN_ON_EXPRESSION } +SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 RIGHT JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); +1 1 1 1 1 1 +SELECT t1.*, t2.* FROM t1 FULL JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); + 0 0 \N key3 a3 1 1 1 +key1 a 1 1 2 key1 A 1 2 1 +key1 a 1 1 2 key1 B 2 1 2 +key1 a 1 1 2 key1 C 3 4 5 +key1 a 1 1 2 key1 D 4 1 6 +key1 b 2 3 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 b 2 3 2 key1 D 4 1 6 +key1 c 3 2 1 key1 C 3 4 5 +key1 c 3 2 1 key1 D 4 1 6 +key1 d 4 7 2 key1 D 4 1 6 +key1 e 5 5 5 0 0 \N +key2 a2 1 1 1 0 0 \N +key4 f 2 3 4 key4 F 1 1 1 +SELECT t1.*, t2.* from t1 FULL JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); + 0 0 \N key3 a3 1 1 1 + 0 0 \N key4 F 1 1 1 +key1 a 1 1 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 c 3 2 1 key1 B 2 1 2 +key1 c 3 2 1 key1 C 3 4 5 +key1 d 4 7 2 key1 D 4 1 6 +key1 e 5 5 5 0 0 \N +key2 a2 1 1 1 0 0 \N +key4 f 2 3 4 0 0 \N +SELECT t1.*, t2.* from t1 FULL JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); + 0 0 \N key1 A 1 2 1 + 0 0 \N key3 a3 1 1 1 + 0 0 \N key4 F 1 1 1 +key1 a 1 1 2 key1 B 2 1 2 +key1 a 1 1 2 key1 C 3 4 5 +key1 a 1 1 2 key1 D 4 1 6 +key1 b 2 3 2 key1 C 3 4 5 +key1 b 2 3 2 key1 D 4 1 6 +key1 c 3 2 1 key1 D 4 1 6 +key1 d 4 7 2 0 0 \N +key1 e 5 5 5 0 0 \N +key2 a2 1 1 1 0 0 \N +key4 f 2 3 4 0 0 \N +SELECT t1.*, t2.* from t1 FULL JOIN t2 ON t1.key = t2.key and t1.c ORDER BY (t1.key, t1.attr, t2.key, t2.attr); -- { serverError INVALID_JOIN_ON_EXPRESSION } +SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 FULL JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); +1 1 1 1 1 1 +SET join_algorithm='grace_hash'; +SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 A 1 2 1 +key1 a 1 1 2 key1 B 2 1 2 +key1 a 1 1 2 key1 C 3 4 5 +key1 a 1 1 2 key1 D 4 1 6 +key1 b 2 3 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 b 2 3 2 key1 D 4 1 6 +key1 c 3 2 1 key1 C 3 4 5 +key1 c 3 2 1 key1 D 4 1 6 +key1 d 4 7 2 key1 D 4 1 6 +key1 e 5 5 5 0 0 \N +key2 a2 1 1 1 0 0 \N +key4 f 2 3 4 key4 F 1 1 1 +SELECT t1.*, t2.* from t1 LEFT JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 c 3 2 1 key1 B 2 1 2 +key1 c 3 2 1 key1 C 3 4 5 +key1 d 4 7 2 key1 D 4 1 6 +key1 e 5 5 5 0 0 \N +key2 a2 1 1 1 0 0 \N +key4 f 2 3 4 0 0 \N +SELECT t1.*, t2.* from t1 LEFT JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 B 2 1 2 +key1 a 1 1 2 key1 C 3 4 5 +key1 a 1 1 2 key1 D 4 1 6 +key1 b 2 3 2 key1 C 3 4 5 +key1 b 2 3 2 key1 D 4 1 6 +key1 c 3 2 1 key1 D 4 1 6 +key1 d 4 7 2 0 0 \N +key1 e 5 5 5 0 0 \N +key2 a2 1 1 1 0 0 \N +key4 f 2 3 4 0 0 \N +SELECT t1.*, t2.* from t1 LEFT JOIN t2 ON t1.key = t2.key and t1.c ORDER BY (t1.key, t1.attr, t2.key, t2.attr); -- { serverError INVALID_JOIN_ON_EXPRESSION } +SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 LEFT JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); +1 1 1 1 1 1 +SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 A 1 2 1 +key1 a 1 1 2 key1 B 2 1 2 +key1 a 1 1 2 key1 C 3 4 5 +key1 a 1 1 2 key1 D 4 1 6 +key1 b 2 3 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 b 2 3 2 key1 D 4 1 6 +key1 c 3 2 1 key1 C 3 4 5 +key1 c 3 2 1 key1 D 4 1 6 +key1 d 4 7 2 key1 D 4 1 6 +key4 f 2 3 4 key4 F 1 1 1 +SELECT t1.*, t2.* from t1 INNER JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 c 3 2 1 key1 B 2 1 2 +key1 c 3 2 1 key1 C 3 4 5 +key1 d 4 7 2 key1 D 4 1 6 +SELECT t1.*, t2.* from t1 INNER JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 B 2 1 2 +key1 a 1 1 2 key1 C 3 4 5 +key1 a 1 1 2 key1 D 4 1 6 +key1 b 2 3 2 key1 C 3 4 5 +key1 b 2 3 2 key1 D 4 1 6 +key1 c 3 2 1 key1 D 4 1 6 +SELECT t1.*, t2.* from t1 INNER JOIN t2 ON t1.key = t2.key and t1.c ORDER BY (t1.key, t1.attr, t2.key, t2.attr); -- { serverError INVALID_JOIN_ON_EXPRESSION } +SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 INNER JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); +1 1 1 1 1 1 +SELECT t1.*, t2.* FROM t1 RIGHT JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); + 0 0 \N key3 a3 1 1 1 +key1 a 1 1 2 key1 A 1 2 1 +key1 a 1 1 2 key1 B 2 1 2 +key1 a 1 1 2 key1 C 3 4 5 +key1 a 1 1 2 key1 D 4 1 6 +key1 b 2 3 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 b 2 3 2 key1 D 4 1 6 +key1 c 3 2 1 key1 C 3 4 5 +key1 c 3 2 1 key1 D 4 1 6 +key1 d 4 7 2 key1 D 4 1 6 +key4 f 2 3 4 key4 F 1 1 1 +SELECT t1.*, t2.* from t1 RIGHT JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); + 0 0 \N key3 a3 1 1 1 + 0 0 \N key4 F 1 1 1 +key1 a 1 1 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 c 3 2 1 key1 B 2 1 2 +key1 c 3 2 1 key1 C 3 4 5 +key1 d 4 7 2 key1 D 4 1 6 +SELECT t1.*, t2.* from t1 RIGHT JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); + 0 0 \N key1 A 1 2 1 + 0 0 \N key3 a3 1 1 1 + 0 0 \N key4 F 1 1 1 +key1 a 1 1 2 key1 B 2 1 2 +key1 a 1 1 2 key1 C 3 4 5 +key1 a 1 1 2 key1 D 4 1 6 +key1 b 2 3 2 key1 C 3 4 5 +key1 b 2 3 2 key1 D 4 1 6 +key1 c 3 2 1 key1 D 4 1 6 +SELECT t1.*, t2.* from t1 RIGHT JOIN t2 ON t1.key = t2.key and t1.c ORDER BY (t1.key, t1.attr, t2.key, t2.attr); -- { serverError INVALID_JOIN_ON_EXPRESSION } +SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 RIGHT JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); +1 1 1 1 1 1 +SELECT t1.*, t2.* FROM t1 FULL JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); + 0 0 \N key3 a3 1 1 1 +key1 a 1 1 2 key1 A 1 2 1 +key1 a 1 1 2 key1 B 2 1 2 +key1 a 1 1 2 key1 C 3 4 5 +key1 a 1 1 2 key1 D 4 1 6 +key1 b 2 3 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 b 2 3 2 key1 D 4 1 6 +key1 c 3 2 1 key1 C 3 4 5 +key1 c 3 2 1 key1 D 4 1 6 +key1 d 4 7 2 key1 D 4 1 6 +key1 e 5 5 5 0 0 \N +key2 a2 1 1 1 0 0 \N +key4 f 2 3 4 key4 F 1 1 1 +SELECT t1.*, t2.* from t1 FULL JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); + 0 0 \N key3 a3 1 1 1 + 0 0 \N key4 F 1 1 1 +key1 a 1 1 2 key1 A 1 2 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 c 3 2 1 key1 B 2 1 2 +key1 c 3 2 1 key1 C 3 4 5 +key1 d 4 7 2 key1 D 4 1 6 +key1 e 5 5 5 0 0 \N +key2 a2 1 1 1 0 0 \N +key4 f 2 3 4 0 0 \N +SELECT t1.*, t2.* from t1 FULL JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); + 0 0 \N key1 A 1 2 1 + 0 0 \N key3 a3 1 1 1 + 0 0 \N key4 F 1 1 1 +key1 a 1 1 2 key1 B 2 1 2 +key1 a 1 1 2 key1 C 3 4 5 +key1 a 1 1 2 key1 D 4 1 6 +key1 b 2 3 2 key1 C 3 4 5 +key1 b 2 3 2 key1 D 4 1 6 +key1 c 3 2 1 key1 D 4 1 6 +key1 d 4 7 2 0 0 \N +key1 e 5 5 5 0 0 \N +key2 a2 1 1 1 0 0 \N +key4 f 2 3 4 0 0 \N +SELECT t1.*, t2.* from t1 FULL JOIN t2 ON t1.key = t2.key and t1.c ORDER BY (t1.key, t1.attr, t2.key, t2.attr); -- { serverError INVALID_JOIN_ON_EXPRESSION } +SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 FULL JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); +1 1 1 1 1 1 +SET join_algorithm='hash'; +SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.key = t2.key AND t1.a < t2.a OR t1.a = t2.a ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 A 1 2 1 +key1 a 1 1 2 key1 B 2 1 2 +key1 a 1 1 2 key1 C 3 4 5 +key1 a 1 1 2 key1 D 4 1 6 +key1 a 1 1 2 key3 a3 1 1 1 +key1 a 1 1 2 key4 F 1 1 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 b 2 3 2 key1 D 4 1 6 +key1 c 3 2 1 key1 C 3 4 5 +key1 c 3 2 1 key1 D 4 1 6 +key1 d 4 7 2 key1 D 4 1 6 +key1 e 5 5 5 0 0 \N +key2 a2 1 1 1 key1 A 1 2 1 +key2 a2 1 1 1 key3 a3 1 1 1 +key2 a2 1 1 1 key4 F 1 1 1 +key4 f 2 3 4 key1 B 2 1 2 +SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.key = t2.key AND t1.a < t2.a OR t1.a = t2.a ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 A 1 2 1 +key1 a 1 1 2 key1 B 2 1 2 +key1 a 1 1 2 key1 C 3 4 5 +key1 a 1 1 2 key1 D 4 1 6 +key1 a 1 1 2 key3 a3 1 1 1 +key1 a 1 1 2 key4 F 1 1 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 b 2 3 2 key1 D 4 1 6 +key1 c 3 2 1 key1 C 3 4 5 +key1 c 3 2 1 key1 D 4 1 6 +key1 d 4 7 2 key1 D 4 1 6 +key2 a2 1 1 1 key1 A 1 2 1 +key2 a2 1 1 1 key3 a3 1 1 1 +key2 a2 1 1 1 key4 F 1 1 1 +key4 f 2 3 4 key1 B 2 1 2 +SELECT t1.*, t2.* FROM t1 RIGHT JOIN t2 ON t1.key = t2.key AND t1.a < t2.a OR t1.a = t2.a ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 A 1 2 1 +key1 a 1 1 2 key1 B 2 1 2 +key1 a 1 1 2 key1 C 3 4 5 +key1 a 1 1 2 key1 D 4 1 6 +key1 a 1 1 2 key3 a3 1 1 1 +key1 a 1 1 2 key4 F 1 1 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 b 2 3 2 key1 D 4 1 6 +key1 c 3 2 1 key1 C 3 4 5 +key1 c 3 2 1 key1 D 4 1 6 +key1 d 4 7 2 key1 D 4 1 6 +key2 a2 1 1 1 key1 A 1 2 1 +key2 a2 1 1 1 key3 a3 1 1 1 +key2 a2 1 1 1 key4 F 1 1 1 +key4 f 2 3 4 key1 B 2 1 2 +SELECT t1.*, t2.* FROM t1 FULL JOIN t2 ON t1.key = t2.key AND t1.a < t2.a OR t1.a = t2.a ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +key1 a 1 1 2 key1 A 1 2 1 +key1 a 1 1 2 key1 B 2 1 2 +key1 a 1 1 2 key1 C 3 4 5 +key1 a 1 1 2 key1 D 4 1 6 +key1 a 1 1 2 key3 a3 1 1 1 +key1 a 1 1 2 key4 F 1 1 1 +key1 b 2 3 2 key1 B 2 1 2 +key1 b 2 3 2 key1 C 3 4 5 +key1 b 2 3 2 key1 D 4 1 6 +key1 c 3 2 1 key1 C 3 4 5 +key1 c 3 2 1 key1 D 4 1 6 +key1 d 4 7 2 key1 D 4 1 6 +key1 e 5 5 5 0 0 \N +key2 a2 1 1 1 key1 A 1 2 1 +key2 a2 1 1 1 key3 a3 1 1 1 +key2 a2 1 1 1 key4 F 1 1 1 +key4 f 2 3 4 key1 B 2 1 2 diff --git a/tests/queries/0_stateless/03006_join_on_inequal_expression_fast.sql.j2 b/tests/queries/0_stateless/03006_join_on_inequal_expression_fast.sql.j2 new file mode 100644 index 00000000000..d3aa74f5c38 --- /dev/null +++ b/tests/queries/0_stateless/03006_join_on_inequal_expression_fast.sql.j2 @@ -0,0 +1,52 @@ +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; + +CREATE TABLE t1 (key String, attr String, a UInt64, b UInt64, c Nullable(UInt64)) ENGINE = MergeTree ORDER BY key; +INSERT INTO t1 VALUES ('key1', 'a', 1, 1, 2), ('key1', 'b', 2, 3, 2), ('key1', 'c', 3, 2, 1), ('key1', 'd', 4, 7, 2), ('key1', 'e', 5, 5, 5), ('key2', 'a2', 1, 1, 1), ('key4', 'f', 2, 3, 4); +CREATE TABLE t2 (key String, attr String, a UInt64, b UInt64, c Nullable(UInt64)) ENGINE = MergeTree ORDER BY key; +INSERT INTO t2 VALUES ('key1', 'A', 1, 2, 1), ('key1', 'B', 2, 1, 2), ('key1', 'C', 3, 4, 5), ('key1', 'D', 4, 1, 6), ('key3', 'a3', 1, 1, 1), ('key4', 'F', 1,1,1); + +SET allow_experimental_analyzer=1; +SET allow_experimental_join_condition=1; +SET join_use_nulls=0; +-- { echoOn } +{% for algorithm in ['hash', 'grace_hash'] -%} +SET join_algorithm='{{ algorithm }}'; +{% for join_type in ['LEFT', 'INNER', 'RIGHT', 'FULL'] -%} +SELECT t1.*, t2.* FROM t1 {{ join_type }} JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +SELECT t1.*, t2.* from t1 {{ join_type }} JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +SELECT t1.*, t2.* from t1 {{ join_type }} JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +SELECT t1.*, t2.* from t1 {{ join_type }} JOIN t2 ON t1.key = t2.key and t1.c ORDER BY (t1.key, t1.attr, t2.key, t2.attr); -- { serverError INVALID_JOIN_ON_EXPRESSION } +SELECT * FROM (SELECT 1 AS a, 1 AS b, 1 AS c) AS t1 {{ join_type }} JOIN (SELECT 1 AS a, 1 AS b, 1 AS c) AS t2 ON t1.a = t2.a AND (t1.b > 0 OR t2.b > 0); +{% endfor -%} +{% endfor -%} + + +{% for algorithm in ['hash'] -%} +SET join_algorithm='{{ algorithm }}'; +{% for join_type in ['LEFT', 'INNER', 'RIGHT', 'FULL'] -%} +SELECT t1.*, t2.* FROM t1 {{ join_type }} JOIN t2 ON t1.key = t2.key AND t1.a < t2.a OR t1.a = t2.a ORDER BY (t1.key, t1.attr, t2.key, t2.attr); +{% endfor -%} +{% endfor -%} +-- { echoOff } + +-- test error messages +{% for algorithm in ['partial_merge', 'full_sorting_merge', 'parallel_hash', 'auto', 'direct'] -%} +SET join_algorithm='{{ algorithm }}'; +{% for join_type in ['LEFT', 'INNER', 'RIGHT', 'FULL'] -%} +SELECT t1.*, t2.* FROM t1 {{ join_type }} JOIN t2 ON (t1.a < t2.a OR lower(t1.attr) == lower(t2.attr)) AND t1.key = t2.key ORDER BY (t1.key, t1.attr, t2.key, t2.attr); -- { serverError NOT_IMPLEMENTED } +SELECT t1.*, t2.* from t1 {{ join_type }} JOIN t2 ON t1.key = t2.key and (t1.b + t2.b == t1.c + t2.c) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); -- { serverError NOT_IMPLEMENTED } +SELECT t1.*, t2.* from t1 {{ join_type }} JOIN t2 ON t1.key = t2.key and (t1.a < t2.a) ORDER BY (t1.key, t1.attr, t2.key, t2.attr); -- { serverError NOT_IMPLEMENTED } +{% endfor -%} +{% endfor -%} + +{% for algorithm in ['grace_hash', 'partial_merge', 'full_sorting_merge', 'parallel_hash', 'auto', 'direct'] -%} +SET join_algorithm='{{ algorithm }}'; +{% for join_type in ['LEFT', 'INNER', 'RIGHT', 'FULL'] -%} +SELECT t1.*, t2.* FROM t1 {{ join_type }} JOIN t2 ON t1.key = t2.key AND t1.a < t2.a OR t1.a = t2.a ORDER BY (t1.key, t1.attr, t2.key, t2.attr); -- { serverError NOT_IMPLEMENTED } +{% endfor -%} +{% endfor -%} + + +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2;