diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index 8883698c52b..11fb6e0ace4 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -1113,7 +1113,8 @@ BlockInputStreamPtr ExpressionActions::createStreamWithNonJoinedDataIfFullOrRigh { for (const auto & action : actions) if (action.join && (action.join->getKind() == ASTTableJoin::Kind::Full || action.join->getKind() == ASTTableJoin::Kind::Right)) - return action.join->createStreamWithNonJoinedRows(source_header, action.join_key_names_left, max_block_size); + return action.join->createStreamWithNonJoinedRows( + source_header, action.join_key_names_left, action.columns_added_by_join, max_block_size); return {}; } diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index e1215fea77d..fbebb0cc919 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -656,7 +656,7 @@ template (block, key_names_left, needed_key_names_right, sample_block_with_columns_to_add, map); + joinBlockImpl(block, key_names_left, columns_added_by_join, sample_block_with_columns_to_add, map); })) { /// Joined @@ -1034,14 +1034,12 @@ struct AdderNonJoined; template struct AdderNonJoined { - static void add(const Mapped & mapped, size_t & rows_added, - size_t num_columns_left, MutableColumns & columns_left, - size_t num_columns_right, MutableColumns & columns_right) + static void add(const Mapped & mapped, size_t & rows_added, MutableColumns & columns_left, MutableColumns & columns_right) { - for (size_t j = 0; j < num_columns_left; ++j) + for (size_t j = 0; j < columns_left.size(); ++j) columns_left[j]->insertDefault(); - for (size_t j = 0; j < num_columns_right; ++j) + for (size_t j = 0; j < columns_right.size(); ++j) columns_right[j]->insertFrom(*mapped.block->getByPosition(j).column.get(), mapped.row_num); ++rows_added; @@ -1051,16 +1049,14 @@ struct AdderNonJoined template struct AdderNonJoined { - static void add(const Mapped & mapped, size_t & rows_added, - size_t num_columns_left, MutableColumns & columns_left, - size_t num_columns_right, MutableColumns & columns_right) + static void add(const Mapped & mapped, size_t & rows_added, MutableColumns & columns_left, MutableColumns & columns_right) { for (auto current = &static_cast(mapped); current != nullptr; current = current->next) { - for (size_t j = 0; j < num_columns_left; ++j) + for (size_t j = 0; j < columns_left.size(); ++j) columns_left[j]->insertDefault(); - for (size_t j = 0; j < num_columns_right; ++j) + for (size_t j = 0; j < columns_right.size(); ++j) columns_right[j]->insertFrom(*current->block->getByPosition(j).column.get(), current->row_num); ++rows_added; @@ -1073,61 +1069,61 @@ struct AdderNonJoined class NonJoinedBlockInputStream : public IBlockInputStream { public: - NonJoinedBlockInputStream(const Join & parent_, const Block & left_sample_block, const Names & key_names_left, size_t max_block_size_) + NonJoinedBlockInputStream(const Join & parent_, const Block & left_sample_block, const Names & key_names_left, + const NamesAndTypesList & columns_added_by_join, size_t max_block_size_) : parent(parent_), max_block_size(max_block_size_) { /** left_sample_block contains keys and "left" columns. * result_sample_block - keys, "left" columns, and "right" columns. */ + std::unordered_map key_renames; + makeResultSampleBlock(left_sample_block, key_names_left, columns_added_by_join, key_renames); + + const Block & right_sample_block = parent.sample_block_with_columns_to_add; + size_t num_keys = key_names_left.size(); size_t num_columns_left = left_sample_block.columns() - num_keys; - size_t num_columns_right = parent.sample_block_with_columns_to_add.columns(); - - result_sample_block = materializeBlock(left_sample_block); - - /// Add columns from the right-side table to the block. - for (size_t i = 0; i < num_columns_right; ++i) - { - const ColumnWithTypeAndName & src_column = parent.sample_block_with_columns_to_add.getByPosition(i); - result_sample_block.insert(src_column.cloneEmpty()); - } + size_t num_columns_right = right_sample_block.columns(); column_indices_left.reserve(num_columns_left); column_indices_keys_and_right.reserve(num_keys + num_columns_right); - std::vector is_key_column_in_left_block(num_keys + num_columns_left, false); + + std::vector is_left_key(left_sample_block.columns(), false); for (const std::string & key : key_names_left) { size_t key_pos = left_sample_block.getPositionByName(key); - is_key_column_in_left_block[key_pos] = true; + is_left_key[key_pos] = true; /// Here we establish the mapping between key columns of the left- and right-side tables. /// key_pos index is inserted in the position corresponding to key column in parent.blocks /// (saved blocks of the right-side table) and points to the same key column /// in the left_sample_block and thus in the result_sample_block. column_indices_keys_and_right.push_back(key_pos); + + auto it = key_renames.find(key); + if (it != key_renames.end()) + key_renames_indices[key_pos] = result_sample_block.getPositionByName(it->second); } - for (size_t i = 0; i < num_keys + num_columns_left; ++i) - { - if (!is_key_column_in_left_block[i]) - column_indices_left.push_back(i); - } + size_t num_src_columns = left_sample_block.columns() + right_sample_block.columns(); - for (size_t i = 0; i < num_columns_right; ++i) - column_indices_keys_and_right.push_back(num_keys + num_columns_left + i); - - /// If use_nulls, convert left columns to Nullable. - if (parent.use_nulls) + for (size_t i = 0; i < result_sample_block.columns(); ++i) { - for (size_t i = 0; i < num_columns_left; ++i) + if (i < left_sample_block.columns()) { - convertColumnToNullable(result_sample_block.getByPosition(column_indices_left[i])); - } - } + if (!is_left_key[i]) + { + column_indices_left.emplace_back(i); - columns_left.resize(num_columns_left); - columns_keys_and_right.resize(num_keys + num_columns_right); + /// If use_nulls, convert left columns to Nullable. + if (parent.use_nulls) + convertColumnToNullable(result_sample_block.getByPosition(i)); + } + } + else if (i < num_src_columns) + column_indices_keys_and_right.emplace_back(i); + } } String getName() const override { return "NonJoined"; } @@ -1159,31 +1155,49 @@ private: /// Indices of key columns in result_sample_block or columns that come from the right-side table. /// Order is significant: it is the same as the order of columns in the blocks of the right-side table that are saved in parent.blocks. ColumnNumbers column_indices_keys_and_right; - /// Columns of the current output block corresponding to column_indices_left. - MutableColumns columns_left; - /// Columns of the current output block corresponding to column_indices_keys_and_right. - MutableColumns columns_keys_and_right; + std::unordered_map key_renames_indices; std::unique_ptr> position; /// type erasure + void makeResultSampleBlock(const Block & left_sample_block, const Names & key_names_left, + const NamesAndTypesList & columns_added_by_join, std::unordered_map & key_renames) + { + const Block & right_sample_block = parent.sample_block_with_columns_to_add; + + result_sample_block = materializeBlock(left_sample_block); + + /// Add columns from the right-side table to the block. + for (size_t i = 0; i < right_sample_block.columns(); ++i) + { + const ColumnWithTypeAndName & src_column = right_sample_block.getByPosition(i); + result_sample_block.insert(src_column.cloneEmpty()); + } + + const auto & key_names_right = parent.key_names_right; + NameSet needed_key_names_right = requiredRightKeys(key_names_right, columns_added_by_join); + + /// Add join key columns from right block if they has different name. + for (size_t i = 0; i < key_names_right.size(); ++i) + { + auto & right_name = key_names_right[i]; + auto & left_name = key_names_left[i]; + + if (needed_key_names_right.count(right_name) && !result_sample_block.has(right_name)) + { + const auto & col = result_sample_block.getByName(left_name); + result_sample_block.insert({col.column, col.type, right_name}); + + key_renames[left_name] = right_name; + } + } + } + template Block createBlock(const Maps & maps) { - size_t num_columns_left = column_indices_left.size(); - size_t num_columns_right = column_indices_keys_and_right.size(); - - for (size_t i = 0; i < num_columns_left; ++i) - { - const auto & src_col = result_sample_block.safeGetByPosition(column_indices_left[i]); - columns_left[i] = src_col.type->createColumn(); - } - - for (size_t i = 0; i < num_columns_right; ++i) - { - const auto & src_col = result_sample_block.safeGetByPosition(column_indices_keys_and_right[i]); - columns_keys_and_right[i] = src_col.type->createColumn(); - } + MutableColumns columns_left = columnsForIndex(result_sample_block, column_indices_left); + MutableColumns columns_keys_and_right = columnsForIndex(result_sample_block, column_indices_keys_and_right); size_t rows_added = 0; @@ -1191,7 +1205,7 @@ private: { #define M(TYPE) \ case Join::Type::TYPE: \ - rows_added = fillColumns(*maps.TYPE); \ + rows_added = fillColumns(*maps.TYPE, columns_left, columns_keys_and_right); \ break; APPLY_FOR_JOIN_VARIANTS(M) #undef M @@ -1204,21 +1218,56 @@ private: return {}; Block res = result_sample_block.cloneEmpty(); - for (size_t i = 0; i < num_columns_left; ++i) + + for (size_t i = 0; i < columns_left.size(); ++i) res.getByPosition(column_indices_left[i]).column = std::move(columns_left[i]); - for (size_t i = 0; i < num_columns_right; ++i) - res.getByPosition(column_indices_keys_and_right[i]).column = std::move(columns_keys_and_right[i]); + + if (key_renames_indices.empty()) + { + for (size_t i = 0; i < columns_keys_and_right.size(); ++i) + res.getByPosition(column_indices_keys_and_right[i]).column = std::move(columns_keys_and_right[i]); + } + else + { + for (size_t i = 0; i < columns_keys_and_right.size(); ++i) + { + size_t key_idx = column_indices_keys_and_right[i]; + + auto it = key_renames_indices.find(key_idx); + if (it != key_renames_indices.end()) + { + auto & key_column = res.getByPosition(key_idx).column; + if (key_column->empty()) + key_column = key_column->cloneResized(columns_keys_and_right[i]->size()); + res.getByPosition(it->second).column = std::move(columns_keys_and_right[i]); + } + else + res.getByPosition(key_idx).column = std::move(columns_keys_and_right[i]); + } + } return res; } + static MutableColumns columnsForIndex(const Block & block, const ColumnNumbers & indices) + { + size_t num_columns = indices.size(); + + MutableColumns columns; + columns.resize(num_columns); + + for (size_t i = 0; i < num_columns; ++i) + { + const auto & src_col = block.safeGetByPosition(indices[i]); + columns[i] = src_col.type->createColumn(); + } + + return columns; + } template - size_t fillColumns(const Map & map) + size_t fillColumns(const Map & map, MutableColumns & columns_left, MutableColumns & columns_keys_and_right) { - size_t num_columns_left = column_indices_left.size(); - size_t num_columns_right = column_indices_keys_and_right.size(); - size_t rows_added = 0; if (!position) @@ -1234,7 +1283,7 @@ private: if (it->second.getUsed()) continue; - AdderNonJoined::add(it->second, rows_added, num_columns_left, columns_left, num_columns_right, columns_keys_and_right); + AdderNonJoined::add(it->second, rows_added, columns_left, columns_keys_and_right); if (rows_added >= max_block_size) { @@ -1248,9 +1297,10 @@ private: }; -BlockInputStreamPtr Join::createStreamWithNonJoinedRows(const Block & left_sample_block, const Names & key_names_left, size_t max_block_size) const +BlockInputStreamPtr Join::createStreamWithNonJoinedRows(const Block & left_sample_block, const Names & key_names_left, + const NamesAndTypesList & columns_added_by_join, size_t max_block_size) const { - return std::make_shared(*this, left_sample_block, key_names_left, max_block_size); + return std::make_shared(*this, left_sample_block, key_names_left, columns_added_by_join, max_block_size); } diff --git a/dbms/src/Interpreters/Join.h b/dbms/src/Interpreters/Join.h index 233aca7d1d1..04e9364605b 100644 --- a/dbms/src/Interpreters/Join.h +++ b/dbms/src/Interpreters/Join.h @@ -260,7 +260,8 @@ public: * Use only after all calls to joinBlock was done. * left_sample_block is passed without account of 'use_nulls' setting (columns will be converted to Nullable inside). */ - BlockInputStreamPtr createStreamWithNonJoinedRows(const Block & left_sample_block, const Names & key_names_left, size_t max_block_size) const; + BlockInputStreamPtr createStreamWithNonJoinedRows(const Block & left_sample_block, const Names & key_names_left, + const NamesAndTypesList & columns_added_by_join, size_t max_block_size) const; /// Number of keys in all built JOIN maps. size_t getTotalRowCount() const; @@ -510,7 +511,7 @@ private: void joinBlockImpl( Block & block, const Names & key_names_left, - const NameSet & needed_key_names_right, + const NamesAndTypesList & columns_added_by_join, const Block & block_with_columns_to_add, const Maps & maps) const; diff --git a/dbms/tests/queries/0_stateless/00702_join_on_dups.reference b/dbms/tests/queries/0_stateless/00702_join_on_dups.reference index 1b418788edf..9be72373625 100644 --- a/dbms/tests/queries/0_stateless/00702_join_on_dups.reference +++ b/dbms/tests/queries/0_stateless/00702_join_on_dups.reference @@ -64,3 +64,55 @@ left expr 5 G 0 8 H 0 9 I 9 i +right +0 6 g +0 7 h +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +9 I 9 i +right subs +0 6 g +0 7 h +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +9 I 9 i +full +0 6 g +0 7 h +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +5 G 0 +8 H 0 +9 I 9 i +full subs +0 6 g +0 7 h +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +5 G 0 +8 H 0 +9 I 9 i diff --git a/dbms/tests/queries/0_stateless/00702_join_on_dups.sql b/dbms/tests/queries/0_stateless/00702_join_on_dups.sql index ce47b0ca7a5..e259b78445d 100644 --- a/dbms/tests/queries/0_stateless/00702_join_on_dups.sql +++ b/dbms/tests/queries/0_stateless/00702_join_on_dups.sql @@ -22,17 +22,17 @@ select s.*, j.* from (select * from X) as s left join (select * from Y) as j on select 'left expr'; select X.*, Y.* from X left join Y on (X.id + 1) = (Y.id + 1); ---select 'right'; ---select X.*, Y.* from X right join Y on X.id = Y.id order by id; ---select 'right subs'; ---select s.*, j.* from (select * from X) as s right join (select * from Y) as j on s.id = j.id order by id; +select 'right'; +select X.*, Y.* from X right join Y on X.id = Y.id order by id; +select 'right subs'; +select s.*, j.* from (select * from X) as s right join (select * from Y) as j on s.id = j.id order by id; --select 'right expr'; --select X.*, Y.* from X right join Y on (X.id + 1) = (Y.id + 1) order by id; ---select 'full'; ---select X.*, Y.* from X full join Y on X.id = Y.id order by id; ---select 'full subs'; ---select s.*, j.* from (select * from X) as s full join (select * from Y) as j on s.id = j.id order by id; +select 'full'; +select X.*, Y.* from X full join Y on X.id = Y.id order by id; +select 'full subs'; +select s.*, j.* from (select * from X) as s full join (select * from Y) as j on s.id = j.id order by id; --select 'full expr'; --select X.*, Y.* from X full join Y on (X.id + 1) = (Y.id + 1) order by id; diff --git a/dbms/tests/queries/0_stateless/00702_join_with_using_dups.reference b/dbms/tests/queries/0_stateless/00702_join_with_using_dups.reference index a66da2378e3..13928b0473c 100644 --- a/dbms/tests/queries/0_stateless/00702_join_with_using_dups.reference +++ b/dbms/tests/queries/0_stateless/00702_join_with_using_dups.reference @@ -42,3 +42,55 @@ left subs 5 G 0 8 H 0 9 I 9 i +right +0 6 g +0 7 h +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +9 I 9 i +right subs +0 6 g +0 7 h +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +9 I 9 i +full +0 6 g +0 7 h +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +5 G 0 +8 H 0 +9 I 9 i +full subs +0 6 g +0 7 h +1 A 1 a +1 A 1 b +2 B 2 c +2 C 2 c +3 D 3 d +3 D 3 e +4 E 4 f +4 F 4 f +5 G 0 +8 H 0 +9 I 9 i diff --git a/dbms/tests/queries/0_stateless/00702_join_with_using_dups.sql b/dbms/tests/queries/0_stateless/00702_join_with_using_dups.sql index 59fac694c0d..4f68381c28f 100644 --- a/dbms/tests/queries/0_stateless/00702_join_with_using_dups.sql +++ b/dbms/tests/queries/0_stateless/00702_join_with_using_dups.sql @@ -18,15 +18,15 @@ select X.*, Y.* from X left join Y using id; select 'left subs'; select s.*, j.* from (select * from X) as s left join (select * from Y) as j using id; ---select 'right'; ---select X.*, Y.* from X right join Y using id order by id; ---select 'right subs'; ---select s.*, j.* from (select * from X) as s right join (select * from Y) as j using id order by id; +select 'right'; +select X.*, Y.* from X right join Y using id order by id; +select 'right subs'; +select s.*, j.* from (select * from X) as s right join (select * from Y) as j using id order by id; ---select 'full'; ---select X.*, Y.* from X full join Y using id order by id; ---select 'full subs'; ---select s.*, j.* from (select * from X) as s full join (select * from Y) as j using id order by id; +select 'full'; +select X.*, Y.* from X full join Y using id order by id; +select 'full subs'; +select s.*, j.* from (select * from X) as s full join (select * from Y) as j using id order by id; drop table X; drop table Y; diff --git a/dbms/tests/queries/0_stateless/00722_inner_join.reference b/dbms/tests/queries/0_stateless/00722_inner_join.reference index 9fdac0e26a1..c482ca7ba9d 100644 --- a/dbms/tests/queries/0_stateless/00722_inner_join.reference +++ b/dbms/tests/queries/0_stateless/00722_inner_join.reference @@ -21,6 +21,8 @@ └──────────┴──────┘ one system one +system one test one 2 2 +2 diff --git a/dbms/tests/queries/0_stateless/00722_inner_join.sql b/dbms/tests/queries/0_stateless/00722_inner_join.sql index 9d9c4c48d4e..0c544b12ab9 100644 --- a/dbms/tests/queries/0_stateless/00722_inner_join.sql +++ b/dbms/tests/queries/0_stateless/00722_inner_join.sql @@ -58,10 +58,10 @@ SELECT t.name --, db.name FROM (SELECT name, database FROM system.tables WHERE name = 'one') AS t JOIN (SELECT name FROM system.databases WHERE name = 'system') AS db ON t.database = db.name; ---SELECT db.name, t.name --- FROM system.tables AS t --- JOIN (SELECT * FROM system.databases WHERE name = 'system') AS db ON t.database = db.name --- WHERE t.name = 'one'; +SELECT db.name, t.name + FROM system.tables AS t + JOIN (SELECT * FROM system.databases WHERE name = 'system') AS db ON t.database = db.name + WHERE t.name = 'one'; SELECT database, t.name FROM system.tables AS t @@ -72,10 +72,10 @@ SELECT count(t.database) FROM (SELECT * FROM system.tables WHERE name = 'one') AS t JOIN system.databases AS db ON t.database = db.name; ---SELECT count(db.name) --- FROM system.tables AS t --- JOIN system.databases AS db ON t.database = db.name --- WHERE t.name = 'one'; +SELECT count(db.name) + FROM system.tables AS t + JOIN system.databases AS db ON t.database = db.name + WHERE t.name = 'one'; SELECT count() FROM system.tables AS t