fix key columns indexing for RIGHT and FULL joins [#CLICKHOUSE-2979]

This commit is contained in:
Alexey Zatelepin 2017-04-27 20:48:30 +03:00 committed by alexey-milovidov
parent 419b6e224a
commit aeb83f9ef7
4 changed files with 55 additions and 26 deletions

View File

@ -968,7 +968,7 @@ struct AdderNonJoined<ASTTableJoin::Strictness::All, Mapped>
class NonJoinedBlockInputStream : public IProfilingBlockInputStream class NonJoinedBlockInputStream : public IProfilingBlockInputStream
{ {
public: public:
NonJoinedBlockInputStream(const Join & parent_, Block & left_sample_block, size_t max_block_size_) NonJoinedBlockInputStream(const Join & parent_, const Block & left_sample_block, size_t max_block_size_)
: parent(parent_), max_block_size(max_block_size_) : parent(parent_), max_block_size(max_block_size_)
{ {
/** left_sample_block contains keys and "left" columns. /** left_sample_block contains keys and "left" columns.
@ -981,39 +981,43 @@ public:
result_sample_block = left_sample_block; result_sample_block = left_sample_block;
// std::cerr << result_sample_block.dumpStructure() << "\n"; /// Add columns from the right-side table to the block.
/// Add new columns to the block.
for (size_t i = 0; i < num_columns_right; ++i) for (size_t i = 0; i < num_columns_right; ++i)
{ {
const ColumnWithTypeAndName & src_column = parent.sample_block_with_columns_to_add.safeGetByPosition(i); const ColumnWithTypeAndName & src_column = parent.sample_block_with_columns_to_add.getByPosition(i);
ColumnWithTypeAndName new_column = src_column.cloneEmpty(); result_sample_block.insert(src_column.cloneEmpty());
result_sample_block.insert(std::move(new_column));
} }
column_numbers_left.reserve(num_columns_left); column_indices_left.reserve(num_columns_left);
column_numbers_keys_and_right.reserve(num_keys + num_columns_right); column_indices_keys_and_right.reserve(num_keys + num_columns_right);
std::vector<bool> is_key_column_in_left_block(num_keys + num_columns_left, false);
for (const std::string & key : parent.key_names_left)
{
size_t key_pos = left_sample_block.getPositionByName(key);
is_key_column_in_left_block[key_pos] = true;
/// Here we establish the mapping between key columns of the left- and right-side tables.
/// key_pos index is inserted in the position corresponding to key column in parent.blocks
/// (saved blocks of the right-side table) and points to the same key column
/// in the left_sample_block and thus in the result_sample_block.
column_indices_keys_and_right.push_back(key_pos);
}
for (size_t i = 0; i < num_keys + num_columns_left; ++i) for (size_t i = 0; i < num_keys + num_columns_left; ++i)
{ {
const String & name = left_sample_block.safeGetByPosition(i).name; if (!is_key_column_in_left_block[i])
column_indices_left.push_back(i);
auto found_key_column = std::find(parent.key_names_left.begin(), parent.key_names_left.end(), name);
if (parent.key_names_left.end() == found_key_column)
column_numbers_left.push_back(i);
else
column_numbers_keys_and_right.push_back(found_key_column - parent.key_names_left.begin());
} }
for (size_t i = 0; i < num_columns_right; ++i) for (size_t i = 0; i < num_columns_right; ++i)
column_numbers_keys_and_right.push_back(num_keys + num_columns_left + i); column_indices_keys_and_right.push_back(num_keys + num_columns_left + i);
/// If use_nulls, convert left columns to Nullable. /// If use_nulls, convert left columns to Nullable.
if (parent.use_nulls) if (parent.use_nulls)
{ {
for (size_t i = 0; i < num_columns_left; ++i) for (size_t i = 0; i < num_columns_left; ++i)
{ {
convertColumnToNullable(result_sample_block.getByPosition(column_numbers_left[i])); convertColumnToNullable(result_sample_block.getByPosition(column_indices_left[i]));
} }
} }
@ -1050,9 +1054,14 @@ private:
size_t max_block_size; size_t max_block_size;
Block result_sample_block; Block result_sample_block;
ColumnNumbers column_numbers_left; /// Indices of columns in result_sample_block that come from the left-side table (except key columns).
ColumnNumbers column_numbers_keys_and_right; ColumnNumbers column_indices_left;
/// Indices of key columns in result_sample_block or columns that come from the right-side table.
/// Order is significant: it is the same as the order of columns in the blocks of the right-side table that are saved in parent.blocks.
ColumnNumbers column_indices_keys_and_right;
/// Columns of the current output block corresponding to column_indices_left.
ColumnPlainPtrs columns_left; ColumnPlainPtrs columns_left;
/// Columns of the current output block corresponding to column_indices_keys_and_right.
ColumnPlainPtrs columns_keys_and_right; ColumnPlainPtrs columns_keys_and_right;
std::unique_ptr<void, std::function<void(void *)>> position; /// type erasure std::unique_ptr<void, std::function<void(void *)>> position; /// type erasure
@ -1063,19 +1072,19 @@ private:
{ {
Block block = result_sample_block.cloneEmpty(); Block block = result_sample_block.cloneEmpty();
size_t num_columns_left = column_numbers_left.size(); size_t num_columns_left = column_indices_left.size();
size_t num_columns_right = column_numbers_keys_and_right.size(); size_t num_columns_right = column_indices_keys_and_right.size();
for (size_t i = 0; i < num_columns_left; ++i) for (size_t i = 0; i < num_columns_left; ++i)
{ {
auto & column_with_type_and_name = block.safeGetByPosition(column_numbers_left[i]); auto & column_with_type_and_name = block.safeGetByPosition(column_indices_left[i]);
column_with_type_and_name.column = column_with_type_and_name.type->createColumn(); column_with_type_and_name.column = column_with_type_and_name.type->createColumn();
columns_left[i] = column_with_type_and_name.column.get(); columns_left[i] = column_with_type_and_name.column.get();
} }
for (size_t i = 0; i < num_columns_right; ++i) for (size_t i = 0; i < num_columns_right; ++i)
{ {
auto & column_with_type_and_name = block.safeGetByPosition(column_numbers_keys_and_right[i]); auto & column_with_type_and_name = block.safeGetByPosition(column_indices_keys_and_right[i]);
column_with_type_and_name.column = column_with_type_and_name.type->createColumn(); column_with_type_and_name.column = column_with_type_and_name.type->createColumn();
columns_keys_and_right[i] = column_with_type_and_name.column.get(); columns_keys_and_right[i] = column_with_type_and_name.column.get();
columns_keys_and_right[i]->reserve(column_with_type_and_name.column->size()); columns_keys_and_right[i]->reserve(column_with_type_and_name.column->size());

View File

@ -360,9 +360,9 @@ private:
ASTTableJoin::Kind kind; ASTTableJoin::Kind kind;
ASTTableJoin::Strictness strictness; ASTTableJoin::Strictness strictness;
/// Names of key columns (columns for equi-JOIN) in "left" table. /// Names of key columns (columns for equi-JOIN) in "left" table (in the order they appear in USING clause).
const Names key_names_left; const Names key_names_left;
/// Names of key columns (columns for equi-JOIN) in "right" table. /// Names of key columns (columns for equi-JOIN) in "right" table (in the order they appear in USING clause).
const Names key_names_right; const Names key_names_right;
/// Substitute NULLs for non-JOINed rows. /// Substitute NULLs for non-JOINed rows.
@ -387,7 +387,9 @@ private:
Sizes key_sizes; Sizes key_sizes;
/// Block with columns from the right-side table except key columns.
Block sample_block_with_columns_to_add; Block sample_block_with_columns_to_add;
/// Block with key columns in the same order they appear in the right-side table.
Block sample_block_with_keys; Block sample_block_with_keys;
Poco::Logger * log; Poco::Logger * log;

View File

@ -38,3 +38,8 @@ Hello [0,1,2]
3 3
4 4
5 5
1 2 3 aaa
2 3 4 bbb ccc
5 6 7 ddd
2 3 4 bbb ccc
5 6 7 ddd

View File

@ -9,3 +9,16 @@ SELECT k, x FROM (SELECT arrayJoin([1, 2, 3]) AS k, 'Hello' AS x) ANY RIGHT JOIN
SELECT k, y FROM (SELECT arrayJoin([1, 2, 3]) AS k, 'Hello' AS x) ANY RIGHT JOIN (SELECT range(k) AS y, arrayJoin([3, 4, 5]) AS k) USING k WHERE k < 10 ORDER BY k; SELECT k, y FROM (SELECT arrayJoin([1, 2, 3]) AS k, 'Hello' AS x) ANY RIGHT JOIN (SELECT range(k) AS y, arrayJoin([3, 4, 5]) AS k) USING k WHERE k < 10 ORDER BY k;
SELECT x, y FROM (SELECT arrayJoin([1, 2, 3]) AS k, 'Hello' AS x) ANY RIGHT JOIN (SELECT range(k) AS y, arrayJoin([3, 4, 5]) AS k) USING k WHERE k < 10 ORDER BY k; SELECT x, y FROM (SELECT arrayJoin([1, 2, 3]) AS k, 'Hello' AS x) ANY RIGHT JOIN (SELECT range(k) AS y, arrayJoin([3, 4, 5]) AS k) USING k WHERE k < 10 ORDER BY k;
SELECT k FROM (SELECT arrayJoin([1, 2, 3]) AS k, 'Hello' AS x) ANY RIGHT JOIN (SELECT range(k) AS y, arrayJoin([3, 4, 5]) AS k) USING k WHERE k < 10 ORDER BY k; SELECT k FROM (SELECT arrayJoin([1, 2, 3]) AS k, 'Hello' AS x) ANY RIGHT JOIN (SELECT range(k) AS y, arrayJoin([3, 4, 5]) AS k) USING k WHERE k < 10 ORDER BY k;
DROP TABLE IF EXISTS t1;
DROP TABLE IF EXISTS t2;
CREATE TABLE t1 (k1 UInt32, k2 UInt32, k3 UInt32, val_t1 String) ENGINE=TinyLog;
CREATE TABLE t2 (val_t2 String, k3 UInt32, k2_alias UInt32, k1 UInt32) ENGINE=TinyLog;
INSERT INTO t1 VALUES (1, 2, 3, 'aaa'), (2, 3, 4, 'bbb');
INSERT INTO t2 VALUES ('ccc', 4, 3, 2), ('ddd', 7, 6, 5);
SELECT k1, k2, k3, val_t1, val_t2 FROM (SELECT * FROM t1) ANY FULL JOIN (SELECT * FROM t2) USING (k3, k1, k2 AS k2_alias) ORDER BY k1, k2, k3;
SELECT k1, k2, k3, val_t1, val_t2 FROM (SELECT * FROM t1) ANY RIGHT JOIN (SELECT * FROM t2) USING (k3, k1, k2 AS k2_alias) ORDER BY k1, k2, k3;