From 5ce65546e93cbb798e5ca0df708618be1728688a Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 2 Dec 2019 21:07:27 +0300 Subject: [PATCH] semi & anti join --- dbms/src/Interpreters/Join.cpp | 164 ++++++++---------- dbms/src/Interpreters/Join.h | 20 ++- dbms/src/Interpreters/joinDispatch.h | 18 +- dbms/src/Parsers/ASTTablesInSelectQuery.cpp | 6 + dbms/src/Parsers/ASTTablesInSelectQuery.h | 6 +- dbms/src/Parsers/ExpressionElementParsers.cpp | 5 +- .../src/Parsers/ParserTablesInSelectQuery.cpp | 14 +- dbms/src/Storages/StorageJoin.cpp | 6 +- ..._removing_unused_columns_from_subquery.sql | 2 +- .../0_stateless/00679_replace_asterisk.sql | 7 +- ...identical_result_after_merge_zookeeper.sql | 3 +- .../0_stateless/00859_distinct_with_join.sql | 4 +- ...00956_join_use_nulls_with_array_column.sql | 3 +- .../01009_insert_select_data_loss.sql | 2 +- .../01009_insert_select_nicelulu.sql | 3 +- .../0_stateless/01031_new_any_join.reference | 4 +- .../0_stateless/01031_new_any_join.sql | 2 +- .../01031_semi_anti_join.reference | 15 ++ .../0_stateless/01031_semi_anti_join.sql | 25 +++ 19 files changed, 191 insertions(+), 118 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/01031_semi_anti_join.reference create mode 100644 dbms/tests/queries/0_stateless/01031_semi_anti_join.sql diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 2be7f58767a..d4f09415920 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -193,10 +193,10 @@ static const IColumn * extractAsofColumn(const ColumnRawPtrs & key_columns) return key_columns.back(); } -template +template static KeyGetter createKeyGetter(const ColumnRawPtrs & key_columns, const Sizes & key_sizes) { - if constexpr (STRICTNESS == ASTTableJoin::Strictness::Asof) + if constexpr (is_asof_join) { auto key_column_copy = key_columns; auto key_size_copy = key_sizes; @@ -360,28 +360,19 @@ void Join::setSampleBlock(const Block & block) namespace { /// Inserting an element into a hash table of the form `key -> reference to a string`, which will then be used by JOIN. - template + template struct Inserter { - static void insert(const Join &, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool); - }; - - template - struct Inserter - { - static ALWAYS_INLINE void insert(const Join & join, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool) + static ALWAYS_INLINE void insertOne(const Join & join, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, + Arena & pool) { auto emplace_result = key_getter.emplaceKey(map, i, pool); if (emplace_result.isInserted() || join.anyTakeLastRow()) new (&emplace_result.getMapped()) typename Map::mapped_type(stored_block, i); } - }; - template - struct Inserter - { - static ALWAYS_INLINE void insert(const Join &, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool) + static ALWAYS_INLINE void insertAll(const Join &, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool) { auto emplace_result = key_getter.emplaceKey(map, i, pool); @@ -393,13 +384,9 @@ namespace emplace_result.getMapped().insert({stored_block, i}, pool); } } - }; - template - struct Inserter - { - static ALWAYS_INLINE void insert(Join & join, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool, - const IColumn * asof_column) + static ALWAYS_INLINE void insertAsof(Join & join, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool, + const IColumn * asof_column) { auto emplace_result = key_getter.emplaceKey(map, i, pool); typename Map::mapped_type * time_series_map = &emplace_result.getMapped(); @@ -416,30 +403,27 @@ namespace Join & join, Map & map, size_t rows, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, Arena & pool) { + constexpr bool mapped_one = std::is_same_v || + std::is_same_v; + constexpr bool is_asof_join = STRICTNESS == ASTTableJoin::Strictness::Asof; + const IColumn * asof_column [[maybe_unused]] = nullptr; - if constexpr (STRICTNESS == ASTTableJoin::Strictness::Asof) + if constexpr (is_asof_join) asof_column = extractAsofColumn(key_columns); - auto key_getter = createKeyGetter(key_columns, key_sizes); + auto key_getter = createKeyGetter(key_columns, key_sizes); for (size_t i = 0; i < rows; ++i) { if (has_null_map && (*null_map)[i]) continue; - if constexpr (STRICTNESS == ASTTableJoin::Strictness::Any) - { - constexpr bool mapped_one = std::is_same_v || - std::is_same_v; - if constexpr (mapped_one) - Inserter::insert(join, map, key_getter, stored_block, i, pool); - else - Inserter::insert(join, map, key_getter, stored_block, i, pool); - } - else if constexpr (STRICTNESS == ASTTableJoin::Strictness::Asof) - Inserter::insert(join, map, key_getter, stored_block, i, pool, asof_column); + if constexpr (is_asof_join) + Inserter::insertAsof(join, map, key_getter, stored_block, i, pool, asof_column); + else if constexpr (mapped_one) + Inserter::insertOne(join, map, key_getter, stored_block, i, pool); else - Inserter::insert(join, map, key_getter, stored_block, i, pool); + Inserter::insertAll(join, map, key_getter, stored_block, i, pool); } } @@ -706,11 +690,14 @@ NO_INLINE IColumn::Filter joinRightColumns(const Map & map, AddedColumns & added constexpr bool is_any_join = STRICTNESS == ASTTableJoin::Strictness::Any; constexpr bool is_all_join = STRICTNESS == ASTTableJoin::Strictness::All; constexpr bool is_asof_join = STRICTNESS == ASTTableJoin::Strictness::Asof; - constexpr bool left_or_full = static_in_v; + constexpr bool is_semi_join = STRICTNESS == ASTTableJoin::Strictness::Semi; + constexpr bool is_anti_join = STRICTNESS == ASTTableJoin::Strictness::Anti; + constexpr bool left = KIND == ASTTableJoin::Kind::Left; constexpr bool right = KIND == ASTTableJoin::Kind::Right; + constexpr bool full = KIND == ASTTableJoin::Kind::Full; - constexpr bool add_missing = left_or_full; - constexpr bool need_replication = is_all_join || (is_any_join && right); + constexpr bool add_missing = (left || full) && !is_semi_join; + constexpr bool need_replication = is_all_join || (is_any_join && right) || (is_semi_join && right); size_t rows = added_columns.rows_to_add; IColumn::Filter filter(rows, 0); @@ -723,7 +710,7 @@ NO_INLINE IColumn::Filter joinRightColumns(const Map & map, AddedColumns & added if constexpr (is_asof_join) asof_column = extractAsofColumn(added_columns.key_columns); - auto key_getter = createKeyGetter(added_columns.key_columns, added_columns.key_sizes); + auto key_getter = createKeyGetter(added_columns.key_columns, added_columns.key_sizes); IColumn::Offset current_offset = 0; @@ -768,7 +755,7 @@ NO_INLINE IColumn::Filter joinRightColumns(const Map & map, AddedColumns & added mapped.setUsed(); addFoundRowAll(mapped, added_columns, current_offset); } - else if constexpr (is_any_join && right) + else if constexpr ((is_any_join || is_semi_join) && right) { /// Use first appered left key + it needs left columns replication if (mapped.setUsedOnce()) @@ -786,11 +773,16 @@ NO_INLINE IColumn::Filter joinRightColumns(const Map & map, AddedColumns & added added_columns.appendFromBlock(*mapped.block, mapped.row_num); } } - else if constexpr (is_any_join && KIND == ASTTableJoin::Kind::Full) + else if constexpr (is_any_join && full) { /// TODO } - else /// ANY LEFT + old ANY (RightAny) + else if constexpr (is_anti_join) + { + if constexpr (right) + mapped.setUsed(); + } + else /// ANY LEFT, SEMI LEFT, old ANY (RightAny) { filter[i] = 1; mapped.setUsed(); @@ -798,7 +790,11 @@ NO_INLINE IColumn::Filter joinRightColumns(const Map & map, AddedColumns & added } } else + { + if constexpr (is_anti_join && left) + filter[i] = 1; addNotFoundRow(added_columns, current_offset); + } if constexpr (need_replication) (*added_columns.offsets_to_replicate)[i] = current_offset; @@ -849,12 +845,16 @@ void Join::joinBlockImpl( constexpr bool is_any_join = STRICTNESS == ASTTableJoin::Strictness::Any; constexpr bool is_all_join = STRICTNESS == ASTTableJoin::Strictness::All; constexpr bool is_asof_join = STRICTNESS == ASTTableJoin::Strictness::Asof; - constexpr bool right = KIND == ASTTableJoin::Kind::Right; - constexpr bool inner_or_right = static_in_v; - constexpr bool right_or_full = static_in_v; + constexpr bool is_semi_join = STRICTNESS == ASTTableJoin::Strictness::Semi; + constexpr bool is_anti_join = STRICTNESS == ASTTableJoin::Strictness::Anti; - constexpr bool need_filter = (!is_all_join && inner_or_right) && !(is_any_join && right); - constexpr bool need_replication = is_all_join || (is_any_join && right); + constexpr bool left = KIND == ASTTableJoin::Kind::Left; + constexpr bool right = KIND == ASTTableJoin::Kind::Right; + constexpr bool inner = KIND == ASTTableJoin::Kind::Inner; + constexpr bool full = KIND == ASTTableJoin::Kind::Full; + + constexpr bool need_replication = is_all_join || (is_any_join && right) || (is_semi_join && right); + constexpr bool need_filter = !need_replication && (inner || right || (is_semi_join && left) || (is_anti_join && left)); /// Rare case, when keys are constant. To avoid code bloat, simply materialize them. Columns materialized_columns; @@ -870,7 +870,7 @@ void Join::joinBlockImpl( * Because if they are constants, then in the "not joined" rows, they may have different values * - default values, which can differ from the values of these constants. */ - if constexpr (right_or_full) + if constexpr (right || full) { materializeBlockInplace(block); @@ -1085,61 +1085,44 @@ void Join::joinTotals(Block & block) const } -template -struct AdderNonJoined; - template -struct AdderNonJoined -{ - static void add(const Mapped & mapped, size_t & rows_added, MutableColumns & columns_right) - { - for (size_t j = 0; j < columns_right.size(); ++j) - { - const auto & mapped_column = mapped.block->getByPosition(j).column; - columns_right[j]->insertFrom(*mapped_column, mapped.row_num); - } - - ++rows_added; - } -}; - -template -struct AdderNonJoined +struct AdderNonJoined { static void add(const Mapped & mapped, size_t & rows_added, MutableColumns & columns_right) { + constexpr bool mapped_asof = std::is_same_v; constexpr bool mapped_one = std::is_same_v || std::is_same_v; - if constexpr (!mapped_one) - AdderNonJoined::add(mapped, rows_added, columns_right); - } -}; -template -struct AdderNonJoined -{ - static void add(const Mapped & mapped, size_t & rows_added, MutableColumns & columns_right) - { - for (auto it = mapped.begin(); it.ok(); ++it) + if constexpr (mapped_asof) + { + /// Do nothing + } + else if constexpr (mapped_one) { for (size_t j = 0; j < columns_right.size(); ++j) { - const auto & mapped_column = it->block->getByPosition(j).column; - columns_right[j]->insertFrom(*mapped_column, it->row_num); + const auto & mapped_column = mapped.block->getByPosition(j).column; + columns_right[j]->insertFrom(*mapped_column, mapped.row_num); } ++rows_added; } + else + { + for (auto it = mapped.begin(); it.ok(); ++it) + { + for (size_t j = 0; j < columns_right.size(); ++j) + { + const auto & mapped_column = it->block->getByPosition(j).column; + columns_right[j]->insertFrom(*mapped_column, it->row_num); + } + + ++rows_added; + } + } } }; -template -struct AdderNonJoined -{ - static void add(const Mapped & /*mapped*/, size_t & /*rows_added*/, MutableColumns & /*columns_right*/) - { - // If we have a leftover match in the right hand side, not required to join because we are only support asof left/inner - } -}; /// Stream from not joined earlier rows of the right table. class NonJoinedBlockInputStream : public IBlockInputStream @@ -1348,10 +1331,11 @@ private: for (; it != end; ++it) { const Mapped & mapped = it->getMapped(); + if (mapped.getUsed()) continue; - AdderNonJoined::add(mapped, rows_added, columns_keys_and_right); + AdderNonJoined::add(mapped, rows_added, columns_keys_and_right); if (rows_added >= max_block_size) { @@ -1391,6 +1375,10 @@ private: BlockInputStreamPtr Join::createStreamWithNonJoinedRows(const Block & result_sample_block, UInt64 max_block_size) const { + if (table_join->strictness() == ASTTableJoin::Strictness::Asof || + table_join->strictness() == ASTTableJoin::Strictness::Semi) + return {}; + if (isRightOrFull(table_join->kind())) return std::make_shared(*this, result_sample_block, max_block_size); return {}; diff --git a/dbms/src/Interpreters/Join.h b/dbms/src/Interpreters/Join.h index 58ae04f25d8..11e48c6908e 100644 --- a/dbms/src/Interpreters/Join.h +++ b/dbms/src/Interpreters/Join.h @@ -80,11 +80,23 @@ using MappedAsof = WithFlags; * It is just a hash table: keys -> rows of joined ("right") table. * Additionally, CROSS JOIN is supported: instead of hash table, it use just set of blocks without keys. * - * JOIN-s could be of nine types: ANY/ALL × LEFT/INNER/RIGHT/FULL, and also CROSS. + * JOIN-s could be of these types: + * - ALL × LEFT/INNER/RIGHT/FULL + * - ANY × LEFT/INNER/RIGHT + * - SEMI/ANTI x LEFT/RIGHT + * - ASOF x LEFT/INNER + * - CROSS * - * If ANY is specified - then select only one row from the "right" table, (first encountered row), even if there was more matching rows. - * If ALL is specified - usual JOIN, when rows are multiplied by number of matching rows from the "right" table. - * ANY is more efficient. + * ALL means usual JOIN, when rows are multiplied by number of matching rows from the "right" table. + * ANY uses one line per unique key from right talbe. For LEFT JOIN it would be any row (with needed joined key) from the right table, + * for RIGHT JOIN it would be any row from the left table and for INNER one it would be any row from right and any row from left. + * SEMI JOIN filter left table by keys that are present in right table for LEFT JOIN, and filter right table by keys from left table + * for RIGHT JOIN. In other words SEMI JOIN returns only rows which joining keys present in another table. + * ANTI JOIN is the same as SEMI JOIN but returns rows with joining keys that are NOT present in another table. + * SEMI/ANTI JOINs allow to get values from both tables. For filter table it gets any row with joining same key. For ANTI JOIN it returns + * defaults other table columns. + * ASOF JOIN is not-equi join. For one key column it finds nearest value to join according to join inequality. + * It's expected that ANY|SEMI LEFT JOIN is more efficient that ALL one. * * If INNER is specified - leave only rows that have matching rows from "right" table. * If LEFT is specified - in case when there is no matching row in "right" table, fill it with default values instead. diff --git a/dbms/src/Interpreters/joinDispatch.h b/dbms/src/Interpreters/joinDispatch.h index e5bb644f504..593a19ab637 100644 --- a/dbms/src/Interpreters/joinDispatch.h +++ b/dbms/src/Interpreters/joinDispatch.h @@ -30,6 +30,18 @@ template <> struct MapGetter struct MapGetter { using Map = Join::MapsAllFlagged; }; template <> struct MapGetter { using Map = Join::MapsAllFlagged; }; +/// Only SEMI LEFT and SEMI RIGHT are valid +template <> struct MapGetter { using Map = Join::MapsOne; }; +template <> struct MapGetter { using Map = Join::MapsOne; }; +template <> struct MapGetter { using Map = Join::MapsAll; }; +template <> struct MapGetter { using Map = Join::MapsOne; }; + +/// Only ANTI LEFT and ANTI RIGHT are valid +template <> struct MapGetter { using Map = Join::MapsOne; }; +template <> struct MapGetter { using Map = Join::MapsOne; }; +template <> struct MapGetter { using Map = Join::MapsAllFlagged; }; +template <> struct MapGetter { using Map = Join::MapsOne; }; + template struct MapGetter { @@ -37,11 +49,13 @@ struct MapGetter }; -static constexpr std::array STRICTNESSES = { +static constexpr std::array STRICTNESSES = { ASTTableJoin::Strictness::RightAny, ASTTableJoin::Strictness::Any, ASTTableJoin::Strictness::All, - ASTTableJoin::Strictness::Asof + ASTTableJoin::Strictness::Asof, + ASTTableJoin::Strictness::Semi, + ASTTableJoin::Strictness::Anti, }; static constexpr std::array KINDS = { diff --git a/dbms/src/Parsers/ASTTablesInSelectQuery.cpp b/dbms/src/Parsers/ASTTablesInSelectQuery.cpp index 9fc130adb31..18750d5ccd3 100644 --- a/dbms/src/Parsers/ASTTablesInSelectQuery.cpp +++ b/dbms/src/Parsers/ASTTablesInSelectQuery.cpp @@ -150,6 +150,12 @@ void ASTTableJoin::formatImplBeforeTable(const FormatSettings & settings, Format case Strictness::Asof: settings.ostr << "ASOF "; break; + case Strictness::Semi: + settings.ostr << "SEMI "; + break; + case Strictness::Anti: + settings.ostr << "ANTI "; + break; } } diff --git a/dbms/src/Parsers/ASTTablesInSelectQuery.h b/dbms/src/Parsers/ASTTablesInSelectQuery.h index 57ba6ca93f2..01c6914b46c 100644 --- a/dbms/src/Parsers/ASTTablesInSelectQuery.h +++ b/dbms/src/Parsers/ASTTablesInSelectQuery.h @@ -25,7 +25,7 @@ namespace DB * SAMPLE 1000000 * * Table expressions may be combined with JOINs of following kinds: - * [GLOBAL] [ANY|ALL|] INNER|LEFT|RIGHT|FULL [OUTER] JOIN table_expr + * [GLOBAL] [ANY|ALL|ASOF|SEMI] [INNER|LEFT|RIGHT|FULL] [OUTER] JOIN table_expr * CROSS JOIN * , (comma) * @@ -74,10 +74,12 @@ struct ASTTableJoin : public IAST enum class Strictness { Unspecified, - RightAny, /// Right ANY. If there are many suitable rows in right table, use any from them to join. + RightAny, /// Old ANY JOIN. If there are many suitable rows in right table, use any from them to join. Any, /// Semi Join with any value from filtering table. For LEFT JOIN with Any and RightAny are the same. All, /// If there are many suitable rows to join, use all of them and replicate rows of "left" table (usual semantic of JOIN). Asof, /// For the last JOIN column, pick the latest value + Semi, /// LEFT or RIGHT. SEMI LEFT JOIN filters left table by values exists in right table. SEMI RIGHT - otherwise. + Anti, /// LEFT or RIGHT. Same as SEMI JOIN but filter values that are NOT exists in other table. }; /// Join method. diff --git a/dbms/src/Parsers/ExpressionElementParsers.cpp b/dbms/src/Parsers/ExpressionElementParsers.cpp index 89793a5042d..99dce13ef76 100644 --- a/dbms/src/Parsers/ExpressionElementParsers.cpp +++ b/dbms/src/Parsers/ExpressionElementParsers.cpp @@ -1134,11 +1134,14 @@ const char * ParserAlias::restricted_keywords[] = "INNER", "FULL", "CROSS", - "ASOF", "JOIN", "GLOBAL", "ANY", "ALL", + "ASOF", + "SEMI", + "ANTI", + "ONLY", /// YQL synonym for ANTI "ON", "USING", "PREWHERE", diff --git a/dbms/src/Parsers/ParserTablesInSelectQuery.cpp b/dbms/src/Parsers/ParserTablesInSelectQuery.cpp index 6b970b0565f..7e84925b203 100644 --- a/dbms/src/Parsers/ParserTablesInSelectQuery.cpp +++ b/dbms/src/Parsers/ParserTablesInSelectQuery.cpp @@ -137,6 +137,10 @@ bool ParserTablesInSelectQueryElement::parseImpl(Pos & pos, ASTPtr & node, Expec table_join->strictness = ASTTableJoin::Strictness::All; else if (ParserKeyword("ASOF").ignore(pos)) table_join->strictness = ASTTableJoin::Strictness::Asof; + else if (ParserKeyword("SEMI").ignore(pos)) + table_join->strictness = ASTTableJoin::Strictness::Semi; + else if (ParserKeyword("ANTI").ignore(pos) || ParserKeyword("ONLY").ignore(pos)) + table_join->strictness = ASTTableJoin::Strictness::Anti; else table_join->strictness = ASTTableJoin::Strictness::Unspecified; @@ -153,13 +157,21 @@ bool ParserTablesInSelectQueryElement::parseImpl(Pos & pos, ASTPtr & node, Expec else { /// Use INNER by default as in another DBMS. - table_join->kind = ASTTableJoin::Kind::Inner; + if (table_join->strictness == ASTTableJoin::Strictness::Semi || + table_join->strictness == ASTTableJoin::Strictness::Anti) + table_join->kind = ASTTableJoin::Kind::Left; + else + table_join->kind = ASTTableJoin::Kind::Inner; } if (table_join->strictness != ASTTableJoin::Strictness::Unspecified && table_join->kind == ASTTableJoin::Kind::Cross) throw Exception("You must not specify ANY or ALL for CROSS JOIN.", ErrorCodes::SYNTAX_ERROR); + if ((table_join->strictness == ASTTableJoin::Strictness::Semi || table_join->strictness == ASTTableJoin::Strictness::Anti) && + (table_join->kind != ASTTableJoin::Kind::Left && table_join->kind != ASTTableJoin::Kind::Right)) + throw Exception("SEMI|ANTI JOIN should be LEFT or RIGHT.", ErrorCodes::SYNTAX_ERROR); + /// Optional OUTER keyword for outer joins. if (table_join->kind == ASTTableJoin::Kind::Left || table_join->kind == ASTTableJoin::Kind::Right diff --git a/dbms/src/Storages/StorageJoin.cpp b/dbms/src/Storages/StorageJoin.cpp index 4ec8741df2f..12444867b6b 100644 --- a/dbms/src/Storages/StorageJoin.cpp +++ b/dbms/src/Storages/StorageJoin.cpp @@ -343,9 +343,11 @@ private: throw Exception("New ANY join storage is not implemented yet (set any_join_distinct_right_table_keys=1 to use old one)", ErrorCodes::NOT_IMPLEMENTED); } - else if constexpr (STRICTNESS == ASTTableJoin::Strictness::Asof) + else if constexpr (STRICTNESS == ASTTableJoin::Strictness::Asof || + STRICTNESS == ASTTableJoin::Strictness::Semi || + STRICTNESS == ASTTableJoin::Strictness::Anti) { - throw Exception("ASOF join storage is not implemented yet", ErrorCodes::NOT_IMPLEMENTED); + throw Exception("ASOF|SEMI|ANTI join storage is not implemented yet", ErrorCodes::NOT_IMPLEMENTED); } else for (auto ref_it = it->getMapped().begin(); ref_it.ok(); ++ref_it) diff --git a/dbms/tests/queries/0_stateless/00586_removing_unused_columns_from_subquery.sql b/dbms/tests/queries/0_stateless/00586_removing_unused_columns_from_subquery.sql index c22b72e4126..ce52c652df0 100644 --- a/dbms/tests/queries/0_stateless/00586_removing_unused_columns_from_subquery.sql +++ b/dbms/tests/queries/0_stateless/00586_removing_unused_columns_from_subquery.sql @@ -20,7 +20,7 @@ FROM learnerHash, passed - eventTime AS diff FROM statements - GLOBAL ANY INNER JOIN + GLOBAL SEMI LEFT JOIN ( SELECT learnerHash, diff --git a/dbms/tests/queries/0_stateless/00679_replace_asterisk.sql b/dbms/tests/queries/0_stateless/00679_replace_asterisk.sql index 27ff799be62..19aa939b132 100644 --- a/dbms/tests/queries/0_stateless/00679_replace_asterisk.sql +++ b/dbms/tests/queries/0_stateless/00679_replace_asterisk.sql @@ -1,7 +1,6 @@ -set any_join_distinct_right_table_keys = 1; SET joined_subquery_requires_alias = 0; SELECT * FROM (SELECT 1 AS id, 2 AS value); -SELECT * FROM (SELECT 1 AS id, 2 AS value, 3 AS A) ANY INNER JOIN (SELECT 1 AS id, 4 AS values, 5 AS D) USING id; -SELECT *, d.* FROM ( SELECT 1 AS id, 2 AS value ) ANY INNER JOIN ( SELECT 1 AS id, 3 AS values ) AS d USING id; -SELECT *, d.*, d.values FROM ( SELECT 1 AS id, 2 AS value ) ANY INNER JOIN ( SELECT 1 AS id, 3 AS values ) AS d USING id; +SELECT * FROM (SELECT 1 AS id, 2 AS value, 3 AS A) SEMI LEFT JOIN (SELECT 1 AS id, 4 AS values, 5 AS D) USING id; +SELECT *, d.* FROM ( SELECT 1 AS id, 2 AS value ) SEMI LEFT JOIN ( SELECT 1 AS id, 3 AS values ) AS d USING id; +SELECT *, d.*, d.values FROM ( SELECT 1 AS id, 2 AS value ) SEMI LEFT JOIN ( SELECT 1 AS id, 3 AS values ) AS d USING id; diff --git a/dbms/tests/queries/0_stateless/00721_force_by_identical_result_after_merge_zookeeper.sql b/dbms/tests/queries/0_stateless/00721_force_by_identical_result_after_merge_zookeeper.sql index abf2903d3ea..aa386829276 100644 --- a/dbms/tests/queries/0_stateless/00721_force_by_identical_result_after_merge_zookeeper.sql +++ b/dbms/tests/queries/0_stateless/00721_force_by_identical_result_after_merge_zookeeper.sql @@ -12,8 +12,7 @@ SYSTEM SYNC REPLICA byte_identical_r2; ALTER TABLE byte_identical_r1 ADD COLUMN y DEFAULT rand(); OPTIMIZE TABLE byte_identical_r1 PARTITION tuple() FINAL; -SET any_join_distinct_right_table_keys = 1; -SELECT x, t1.y - t2.y FROM byte_identical_r1 t1 ANY INNER JOIN byte_identical_r2 t2 USING x ORDER BY x; +SELECT x, t1.y - t2.y FROM byte_identical_r1 t1 SEMI LEFT JOIN byte_identical_r2 t2 USING x ORDER BY x; DROP TABLE byte_identical_r1; DROP TABLE byte_identical_r2; diff --git a/dbms/tests/queries/0_stateless/00859_distinct_with_join.sql b/dbms/tests/queries/0_stateless/00859_distinct_with_join.sql index 23c41549502..4fb6f4ec046 100644 --- a/dbms/tests/queries/0_stateless/00859_distinct_with_join.sql +++ b/dbms/tests/queries/0_stateless/00859_distinct_with_join.sql @@ -1,5 +1,3 @@ -set any_join_distinct_right_table_keys = 1; - drop table if exists fooL; drop table if exists fooR; create table fooL (a Int32, v String) engine = Memory; @@ -9,7 +7,7 @@ insert into fooL select number, 'L' || toString(number) from numbers(2); insert into fooL select number, 'LL' || toString(number) from numbers(2); insert into fooR select number, 'R' || toString(number) from numbers(2); -select distinct a from fooL any join fooR using(a) order by a; +select distinct a from fooL semi left join fooR using(a) order by a; drop table fooL; drop table fooR; diff --git a/dbms/tests/queries/0_stateless/00956_join_use_nulls_with_array_column.sql b/dbms/tests/queries/0_stateless/00956_join_use_nulls_with_array_column.sql index 244e04a564a..f70bccd68fd 100644 --- a/dbms/tests/queries/0_stateless/00956_join_use_nulls_with_array_column.sql +++ b/dbms/tests/queries/0_stateless/00956_join_use_nulls_with_array_column.sql @@ -1,4 +1,3 @@ -SET any_join_distinct_right_table_keys = 1; SET join_use_nulls = 1; -SELECT number FROM system.numbers ANY INNER JOIN (SELECT number, ['test'] FROM system.numbers LIMIT 1) js2 USING (number) LIMIT 1; +SELECT number FROM system.numbers SEMI LEFT JOIN (SELECT number, ['test'] FROM system.numbers LIMIT 1) js2 USING (number) LIMIT 1; SELECT number FROM system.numbers ANY LEFT JOIN (SELECT number, ['test'] FROM system.numbers LIMIT 1) js2 USING (number) LIMIT 1; diff --git a/dbms/tests/queries/0_stateless/01009_insert_select_data_loss.sql b/dbms/tests/queries/0_stateless/01009_insert_select_data_loss.sql index 9a754d94323..7ecffd8653c 100644 --- a/dbms/tests/queries/0_stateless/01009_insert_select_data_loss.sql +++ b/dbms/tests/queries/0_stateless/01009_insert_select_data_loss.sql @@ -1,5 +1,5 @@ drop table if exists tab; create table tab (x UInt64) engine = MergeTree order by tuple(); -insert into tab select number as n from numbers(20) any inner join (select number * 10 as n from numbers(2)) using(n) settings any_join_distinct_right_table_keys = 1, max_block_size = 5; +insert into tab select number as n from numbers(20) semi left join (select number * 10 as n from numbers(2)) using(n) settings max_block_size = 5; select * from tab order by x; diff --git a/dbms/tests/queries/0_stateless/01009_insert_select_nicelulu.sql b/dbms/tests/queries/0_stateless/01009_insert_select_nicelulu.sql index 90a902c352d..3fe7ec04e85 100644 --- a/dbms/tests/queries/0_stateless/01009_insert_select_nicelulu.sql +++ b/dbms/tests/queries/0_stateless/01009_insert_select_nicelulu.sql @@ -1,4 +1,3 @@ -Set any_join_distinct_right_table_keys=1; DROP TABLE IF EXISTS test_insert_t1; DROP TABLE IF EXISTS test_insert_t2; DROP TABLE IF EXISTS test_insert_t3; @@ -15,7 +14,7 @@ INSERT INTO test_insert_t2 SELECT '2019-09-01',toString(number) FROM system.numb INSERT INTO test_insert_t2 SELECT '2019-09-01',toString(number) FROM system.numbers WHERE number >=700000 limit 200; INSERT INTO test_insert_t2 SELECT '2019-09-01',toString(number) FROM system.numbers WHERE number >=900000 limit 200; -INSERT INTO test_insert_t3 SELECT '2019-09-01', uid, name, city FROM ( SELECT dt, uid, name, city FROM test_insert_t1 WHERE dt = '2019-09-01') t1 GLOBAL ANY INNER JOIN (SELECT uid FROM test_insert_t2 WHERE dt = '2019-09-01') t2 ON t1.uid=t2.uid; +INSERT INTO test_insert_t3 SELECT '2019-09-01', uid, name, city FROM ( SELECT dt, uid, name, city FROM test_insert_t1 WHERE dt = '2019-09-01') t1 GLOBAL SEMI LEFT JOIN (SELECT uid FROM test_insert_t2 WHERE dt = '2019-09-01') t2 ON t1.uid=t2.uid; SELECT count(*) FROM test_insert_t3; diff --git a/dbms/tests/queries/0_stateless/01031_new_any_join.reference b/dbms/tests/queries/0_stateless/01031_new_any_join.reference index f2e2119503d..1fd9a5352e3 100644 --- a/dbms/tests/queries/0_stateless/01031_new_any_join.reference +++ b/dbms/tests/queries/0_stateless/01031_new_any_join.reference @@ -5,12 +5,12 @@ any left 3 a4 0 4 a5 4 b3 any left (rev) +0 5 b6 2 a3 2 b1 2 a3 2 b2 4 a5 4 b3 4 a5 4 b4 4 a5 4 b5 -4 a5 4 b6 any inner 2 a3 2 b1 4 a5 4 b3 @@ -18,12 +18,12 @@ any inner (rev) 2 a3 2 b1 4 a5 4 b3 any right +0 5 b6 2 a3 2 b1 2 a3 2 b2 4 a5 4 b3 4 a5 4 b4 4 a5 4 b5 -4 a5 4 b6 any right (rev) 0 a1 0 1 a2 0 diff --git a/dbms/tests/queries/0_stateless/01031_new_any_join.sql b/dbms/tests/queries/0_stateless/01031_new_any_join.sql index 822895c520f..de86d8eebc5 100644 --- a/dbms/tests/queries/0_stateless/01031_new_any_join.sql +++ b/dbms/tests/queries/0_stateless/01031_new_any_join.sql @@ -5,7 +5,7 @@ CREATE TABLE t1 (x UInt32, s String) engine = Memory; CREATE TABLE t2 (x UInt32, s String) engine = Memory; INSERT INTO t1 (x, s) VALUES (0, 'a1'), (1, 'a2'), (2, 'a3'), (3, 'a4'), (4, 'a5'); -INSERT INTO t2 (x, s) VALUES (2, 'b1'), (2, 'b2'), (4, 'b3'), (4, 'b4'), (4, 'b5'), (4, 'b6'); +INSERT INTO t2 (x, s) VALUES (2, 'b1'), (2, 'b2'), (4, 'b3'), (4, 'b4'), (4, 'b5'), (5, 'b6'); SET join_use_nulls = 0; SET any_join_distinct_right_table_keys = 0; diff --git a/dbms/tests/queries/0_stateless/01031_semi_anti_join.reference b/dbms/tests/queries/0_stateless/01031_semi_anti_join.reference new file mode 100644 index 00000000000..5dbe67e50b4 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01031_semi_anti_join.reference @@ -0,0 +1,15 @@ +semi left +2 a3 2 b1 +4 a5 4 b3 +semi right +2 a3 2 b1 +2 a3 2 b2 +4 a5 4 b3 +4 a5 4 b4 +4 a5 4 b5 +anti left +0 a1 0 +1 a2 1 +3 a4 3 +anti right +0 5 b6 diff --git a/dbms/tests/queries/0_stateless/01031_semi_anti_join.sql b/dbms/tests/queries/0_stateless/01031_semi_anti_join.sql new file mode 100644 index 00000000000..ee3b81834df --- /dev/null +++ b/dbms/tests/queries/0_stateless/01031_semi_anti_join.sql @@ -0,0 +1,25 @@ +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; + +CREATE TABLE t1 (x UInt32, s String) engine = Memory; +CREATE TABLE t2 (x UInt32, s String) engine = Memory; + +INSERT INTO t1 (x, s) VALUES (0, 'a1'), (1, 'a2'), (2, 'a3'), (3, 'a4'), (4, 'a5'); +INSERT INTO t2 (x, s) VALUES (2, 'b1'), (2, 'b2'), (4, 'b3'), (4, 'b4'), (4, 'b5'), (5, 'b6'); + +SET join_use_nulls = 0; + +SELECT 'semi left'; +SELECT t1.*, t2.* FROM t1 SEMI LEFT JOIN t2 USING(x) ORDER BY t1.x, t2.x; + +SELECT 'semi right'; +SELECT t1.*, t2.* FROM t1 SEMI RIGHT JOIN t2 USING(x) ORDER BY t1.x, t2.x; + +SELECT 'anti left'; +SELECT t1.*, t2.* FROM t1 ANTI LEFT JOIN t2 USING(x) ORDER BY t1.x, t2.x; + +SELECT 'anti right'; +SELECT t1.*, t2.* FROM t1 ANTI RIGHT JOIN t2 USING(x) ORDER BY t1.x, t2.x; + +DROP TABLE t1; +DROP TABLE t2;