fix join_use_nulls results for key columns

This commit is contained in:
chertus 2019-07-31 23:19:52 +03:00
parent f402ae5092
commit 10edd76fe4
8 changed files with 211 additions and 122 deletions

View File

@ -334,13 +334,11 @@ void Join::setSampleBlock(const Block & block)
prepareBlockListStructure(blocklist_sample);
/// Move from `sample_block_with_columns_to_add` key columns to `sample_block_with_keys`, keeping the order.
size_t pos = 0;
while (pos < sample_block_with_columns_to_add.columns())
for (size_t pos = 0; pos < sample_block_with_columns_to_add.columns();)
{
const auto & name = sample_block_with_columns_to_add.getByPosition(pos).name;
if (key_names_right.end() != std::find(key_names_right.begin(), key_names_right.end(), name))
auto & col = sample_block_with_columns_to_add.getByPosition(pos);
if (key_names_right.end() != std::find(key_names_right.begin(), key_names_right.end(), col.name))
{
auto & col = sample_block_with_columns_to_add.getByPosition(pos);
col.column = recursiveRemoveLowCardinality(col.column);
col.type = recursiveRemoveLowCardinality(col.type);
sample_block_with_keys.insert(col);
@ -824,13 +822,8 @@ void Join::joinBlockImpl(
for (size_t i = 0; i < existing_columns; ++i)
{
block.getByPosition(i).column = block.getByPosition(i).column->convertToFullColumnIfConst();
/// If use_nulls, convert left columns (except keys) to Nullable.
if (use_nulls)
{
if (std::end(key_names_left) == std::find(key_names_left.begin(), key_names_left.end(), block.getByPosition(i).name))
convertColumnToNullable(block.getByPosition(i));
}
convertColumnToNullable(block.getByPosition(i));
}
}
@ -855,7 +848,34 @@ void Join::joinBlockImpl(
/// Filter & insert missing rows
auto right_keys = requiredRightKeys(key_names_right, columns_added_by_join);
if constexpr (STRICTNESS == ASTTableJoin::Strictness::Any || STRICTNESS == ASTTableJoin::Strictness::Asof)
constexpr bool is_all_join = STRICTNESS == ASTTableJoin::Strictness::All;
constexpr bool inner_or_right = static_in_v<KIND, ASTTableJoin::Kind::Inner, ASTTableJoin::Kind::Right>;
constexpr bool left_or_full = static_in_v<KIND, ASTTableJoin::Kind::Left, ASTTableJoin::Kind::Full>;
std::vector<size_t> right_keys_to_replicate [[maybe_unused]];
if constexpr (!is_all_join && inner_or_right)
{
/// If ANY INNER | RIGHT JOIN - filter all the columns except the new ones.
for (size_t i = 0; i < existing_columns; ++i)
block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->filter(row_filter, -1);
/// Add join key columns from right block if they has different name.
for (size_t i = 0; i < key_names_right.size(); ++i)
{
auto & right_name = key_names_right[i];
auto & left_name = key_names_left[i];
auto it = right_keys.find(right_name);
if (it != right_keys.end() && !block.has(right_name))
{
const auto & col = block.getByName(left_name);
bool is_nullable = it->second->isNullable();
block.insert(correctNullability({col.column, col.type, right_name}, is_nullable));
}
}
}
else
{
/// Some trash to represent IColumn::Filter as ColumnUInt8 needed for ColumnNullable::applyNullMap()
auto null_map_filter_ptr = ColumnUInt8::create();
@ -863,63 +883,6 @@ void Join::joinBlockImpl(
null_map_filter.getData().swap(row_filter);
const IColumn::Filter & filter = null_map_filter.getData();
constexpr bool inner_or_right = static_in_v<KIND, ASTTableJoin::Kind::Inner, ASTTableJoin::Kind::Right>;
if constexpr (inner_or_right)
{
/// If ANY INNER | RIGHT JOIN - filter all the columns except the new ones.
for (size_t i = 0; i < existing_columns; ++i)
block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->filter(filter, -1);
/// Add join key columns from right block if they has different name.
for (size_t i = 0; i < key_names_right.size(); ++i)
{
auto & right_name = key_names_right[i];
auto & left_name = key_names_left[i];
auto it = right_keys.find(right_name);
if (it != right_keys.end() && !block.has(right_name))
{
const auto & col = block.getByName(left_name);
bool is_nullable = it->second->isNullable();
block.insert(correctNullability({col.column, col.type, right_name}, is_nullable));
}
}
}
else
{
/// Add join key columns from right block if they has different name.
for (size_t i = 0; i < key_names_right.size(); ++i)
{
auto & right_name = key_names_right[i];
auto & left_name = key_names_left[i];
auto it = right_keys.find(right_name);
if (it != right_keys.end() && !block.has(right_name))
{
const auto & col = block.getByName(left_name);
ColumnPtr column = col.column->convertToFullColumnIfConst();
MutableColumnPtr mut_column = column->cloneEmpty();
for (size_t row = 0; row < filter.size(); ++row)
{
if (filter[row])
mut_column->insertFrom(*column, row);
else
mut_column->insertDefault();
}
bool is_nullable = use_nulls || it->second->isNullable();
block.insert(correctNullability({std::move(mut_column), col.type, right_name}, is_nullable, null_map_filter));
}
}
}
}
else
{
constexpr bool left_or_full = static_in_v<KIND, ASTTableJoin::Kind::Left, ASTTableJoin::Kind::Full>;
if (!offsets_to_replicate)
throw Exception("No data to filter columns", ErrorCodes::LOGICAL_ERROR);
/// Add join key columns from right block if they has different name.
for (size_t i = 0; i < key_names_right.size(); ++i)
{
@ -932,31 +895,37 @@ void Join::joinBlockImpl(
const auto & col = block.getByName(left_name);
ColumnPtr column = col.column->convertToFullColumnIfConst();
MutableColumnPtr mut_column = column->cloneEmpty();
mut_column->reserve(column->size());
size_t last_offset = 0;
for (size_t row = 0; row < column->size(); ++row)
for (size_t row = 0; row < filter.size(); ++row)
{
if (size_t to_insert = (*offsets_to_replicate)[row] - last_offset)
{
if (!row_filter[row])
mut_column->insertDefault();
else
for (size_t dup = 0; dup < to_insert; ++dup)
mut_column->insertFrom(*column, row);
}
last_offset = (*offsets_to_replicate)[row];
if (filter[row])
mut_column->insertFrom(*column, row);
else
mut_column->insertDefault();
}
/// TODO: null_map_filter
bool is_nullable = (use_nulls && left_or_full) || it->second->isNullable();
block.insert(correctNullability({std::move(mut_column), col.type, right_name}, is_nullable));
block.insert(correctNullability({std::move(mut_column), col.type, right_name}, is_nullable, null_map_filter));
if constexpr (is_all_join)
right_keys_to_replicate.push_back(block.getPositionByName(right_name));
}
}
}
if constexpr (is_all_join)
{
if (!offsets_to_replicate)
throw Exception("No data to filter columns", ErrorCodes::LOGICAL_ERROR);
/// If ALL ... JOIN - we replicate all the columns except the new ones.
for (size_t i = 0; i < existing_columns; ++i)
block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->replicate(*offsets_to_replicate);
/// Replicate additional right keys
for (size_t pos : right_keys_to_replicate)
block.safeGetByPosition(pos).column = block.safeGetByPosition(pos).column->replicate(*offsets_to_replicate);
}
}
@ -1142,7 +1111,14 @@ struct AdderNonJoined<ASTTableJoin::Strictness::Any, Mapped>
static void add(const Mapped & mapped, size_t & rows_added, MutableColumns & columns_right)
{
for (size_t j = 0; j < columns_right.size(); ++j)
columns_right[j]->insertFrom(*mapped.block->getByPosition(j).column.get(), mapped.row_num);
{
const auto & mapped_column = mapped.block->getByPosition(j).column;
#ifndef NDEBUG
if (columns_right[j]->isNullable() != mapped_column->isNullable())
throw Exception("Wrong columns nullability", ErrorCodes::LOGICAL_ERROR);
#endif
columns_right[j]->insertFrom(*mapped_column, mapped.row_num);
}
++rows_added;
}
@ -1156,7 +1132,14 @@ struct AdderNonJoined<ASTTableJoin::Strictness::All, Mapped>
for (auto it = mapped.begin(); it.ok(); ++it)
{
for (size_t j = 0; j < columns_right.size(); ++j)
columns_right[j]->insertFrom(*it->block->getByPosition(j).column.get(), it->row_num);
{
const auto & mapped_column = it->block->getByPosition(j).column;
#ifndef NDEBUG
if (columns_right[j]->isNullable() != mapped_column->isNullable())
throw Exception("Wrong columns nullability", ErrorCodes::LOGICAL_ERROR);
#endif
columns_right[j]->insertFrom(*mapped_column, it->row_num);
}
++rows_added;
}
@ -1200,7 +1183,7 @@ public:
std::unordered_map<size_t, size_t> left_to_right_key_map;
makeResultSampleBlock(left_sample_block, right_sample_block, columns_added_by_join,
key_positions_left, is_left_key, left_to_right_key_map);
key_positions_left, left_to_right_key_map);
auto nullability_changes = getNullabilityChanges(parent.sample_block_with_keys, result_sample_block,
key_positions_left, left_to_right_key_map);
@ -1269,18 +1252,15 @@ private:
void makeResultSampleBlock(const Block & left_sample_block, const Block & right_sample_block,
const NamesAndTypesList & columns_added_by_join,
const std::vector<size_t> & key_positions_left, const std::vector<bool> & is_left_key,
const std::vector<size_t> & key_positions_left,
std::unordered_map<size_t, size_t> & left_to_right_key_map)
{
result_sample_block = materializeBlock(left_sample_block);
/// Convert left columns to Nullable if allowed
if (parent.use_nulls)
{
for (size_t i = 0; i < result_sample_block.columns(); ++i)
if (!is_left_key[i])
convertColumnToNullable(result_sample_block.getByPosition(i));
}
convertColumnToNullable(result_sample_block.getByPosition(i));
/// Add columns from the right-side table to the block.
for (size_t i = 0; i < right_sample_block.columns(); ++i)
@ -1455,7 +1435,7 @@ private:
const auto & dst = out_block.getByPosition(key_pos).column;
const auto & src = sample_block_with_keys.getByPosition(i).column;
if (isColumnNullable(*dst) != isColumnNullable(*src))
if (dst->isNullable() != src->isNullable())
nullability_changes.insert(key_pos);
}

View File

@ -178,15 +178,15 @@ self left nullable vs not nullable
3 l4 4 2 l3 3
4 l5 \N 3 l4 4
4 l6 \N 3 l4 4
5 l7 \N 0 0
8 l8 \N 0 0
9 l9 \N 0 0
5 l7 \N 0 \N
8 l8 \N 0 \N
9 l9 \N 0 \N
self left nullable vs not nullable 2
1 r1 \N 0
1 r2 \N 0
2 r3 \N 0
3 r4 \N 0
3 r5 \N 0
1 r1 \N 0 \N
1 r2 \N 0 \N
2 r3 \N 0 \N
3 r4 \N 0 \N
3 r5 \N 0 \N
4 r6 nr6 4 r6 nr6
6 r7 nr7 6 r7 nr7
7 r8 nr8 7 r8 nr8
@ -268,6 +268,6 @@ self full nullable vs not nullable
3 l4 4 2 l3 3
4 l5 \N 3 l4 4
4 l6 \N 3 l4 4
5 l7 \N 0 0
8 l8 \N 0 0
9 l9 \N 0 0
5 l7 \N 0 \N
8 l8 \N 0 \N
9 l9 \N 0 \N

View File

@ -3,12 +3,12 @@ l \N \N String Nullable(String)
l \N \N String Nullable(String)
r \N String Nullable(String)
\N r \N Nullable(String) Nullable(String)
l \N String Nullable(String)
l \N String Nullable(String)
l \N \N String Nullable(String)
l \N \N String Nullable(String)
r \N String Nullable(String)
\N r \N Nullable(String) Nullable(String)
\N \N
0 \N
\N \N
using
l \N String Nullable(String)
l \N String Nullable(String)
@ -19,26 +19,26 @@ l \N String Nullable(String)
\N String Nullable(String)
\N \N Nullable(String) Nullable(String)
\N \N
0 \N
\N \N
on + join_use_nulls
l \N \N TODO Nullable(String)
l \N \N TODO Nullable(String)
r \N TODO Nullable(String)
\N r \N TODO Nullable(String)
\N r \N Nullable(String) Nullable(String)
l \N TODO Nullable(String)
l \N TODO Nullable(String)
r \N TODO Nullable(String)
l \N \N TODO Nullable(String)
l \N \N TODO Nullable(String)
\N r \N TODO Nullable(String)
\N r \N Nullable(String) Nullable(String)
\N \N
0 \N
\N \N
using + join_use_nulls
l \N TODO Nullable(String)
l \N TODO Nullable(String)
\N TODO Nullable(String)
\N \N TODO Nullable(String)
\N \N Nullable(String) Nullable(String)
l \N TODO Nullable(String)
l \N TODO Nullable(String)
\N TODO Nullable(String)
\N \N TODO Nullable(String)
\N \N Nullable(String) Nullable(String)
\N \N
0 \N
\N \N

View File

@ -6,7 +6,7 @@ foo \N 2 0 Nullable(String) Nullable(String)
bar bar 1 2 Nullable(String) Nullable(String)
\N 0 1 Nullable(String) Nullable(String)
foo \N 2 0 Nullable(String) Nullable(String)
foo 2 0 String Nullable(String)
foo \N 2 0 String Nullable(String)
bar bar 1 2 String Nullable(String)
test 0 1 String Nullable(String)
\N 0 1 String Nullable(String)

View File

@ -9,13 +9,25 @@ join_use_nulls = 1
1 1
2 2
-
1 1 1 1
2 2 \N \N
-
1 1 1 1
2 2 \N \N
-
1 1 1 1
-
2 2
-
2 2 \N \N
-
\N \N
-
1 1 \N \N
2 2 \N \N
-
1 1 1 1
2 2 \N \N
-
join_use_nulls = 0
1 1
@ -38,6 +50,7 @@ join_use_nulls = 0
-
-
-
-
1 1 0 0
2 2 0 0
-

View File

@ -18,19 +18,21 @@ select * from t join s on (t.a=s.a and t.b=s.b);
select '-';
select t.* from t left join s on (t.a=s.a and t.b=s.b) order by t.a;
select '-';
-- select t.*, s.* from t left join s on (t.a=s.a and t.b=s.b); -- TODO
select t.*, s.* from t left join s on (t.a=s.a and t.b=s.b) order by t.a;
select '-';
-- select t.*, s.* from t left join s on (s.a=t.a and t.b=s.b); -- TODO
select t.*, s.* from t left join s on (s.a=t.a and t.b=s.b) order by t.a;
select '-';
select t.*, s.* from t right join s on (t.a=s.a and t.b=s.b);
select '-';
-- select * from t left outer join s using (a,b) where s.a is null; -- TODO
select * from t left outer join s using (a,b) where s.a is null;
select '-';
-- select * from t left outer join s on (t.a=s.a and t.b=s.b) where s.a is null; -- TODO
select * from t left outer join s on (t.a=s.a and t.b=s.b) where s.a is null;
select '-';
-- select t.*, s.* from t left join s on (s.a=t.a and t.b=s.b and t.a=toInt64(2)) order by t.a; -- TODO
select s.* from t left outer join s on (t.a=s.a and t.b=s.b) where s.a is null;
select '-';
-- select t.*, s.* from t left join s on (s.a=t.a); -- TODO
select t.*, s.* from t left join s on (s.a=t.a and t.b=s.b and t.a=toInt64(2)) order by t.a;
select '-';
select t.*, s.* from t left join s on (s.a=t.a) order by t.a;
select '-';
select t.*, s.* from t left join s on (t.b=toInt64(2) and s.a=t.a) where s.b=2;
@ -54,6 +56,8 @@ select '-';
select '-';
-- select * from t left outer join s on (t.a=s.a and t.b=s.b) where s.a is null; -- TODO
select '-';
-- select s.* from t left outer join s on (t.a=s.a and t.b=s.b) where s.a is null; -- TODO
select '-';
select t.*, s.* from t left join s on (s.a=t.a and t.b=s.b and t.a=toInt64(2)) order by t.a;
select '-';
select t.*, s.* from t left join s on (s.a=t.a) order by t.a;

View File

@ -0,0 +1,20 @@
1 1 a 0 0
0 0 2 2 a
1 1 a \N \N \N
\N \N \N 2 2 a
1 1 a 0 0 \N
0 0 \N 2 2 a
1 1 a \N \N \N
\N \N \N 2 2 a
1 1 a 0 \N
0 \N 2 2 a
1 1 a \N \N \N
\N \N \N 2 2 a
1 1 a 0 \N \N
0 \N \N 2 2 a
1 1 a \N \N \N
\N \N \N 2 2 a
1 1 a \N \N \N
\N \N \N 2 2 a
1 1 a \N \N \N
\N \N \N 2 2 a

View File

@ -0,0 +1,72 @@
drop table if exists t;
drop table if exists s;
create table t(a Int64, b Int64, c String) engine = Memory;
create table s(a Int64, b Int64, c String) engine = Memory;
insert into t values(1,1,'a');
insert into s values(2,2,'a');
select t.*, s.a, s.b, s.c from t left join s on (s.a = t.a and s.b = t.b);
select t.*, s.a, s.b, s.c from t right join s on (s.a = t.a and s.b = t.b);
select t.*, s.a, s.b, s.c from t left join s on (s.a = t.a and s.b = t.b) SETTINGS join_use_nulls = 1;
select t.*, s.a, s.b, s.c from t right join s on (s.a = t.a and s.b = t.b) SETTINGS join_use_nulls = 1;
drop table t;
drop table s;
create table t(a Int64, b Int64, c Nullable(String)) engine = Memory;
create table s(a Int64, b Int64, c Nullable(String)) engine = Memory;
insert into t values(1,1,'a');
insert into s values(2,2,'a');
select * from t left join s on (s.a = t.a and s.b = t.b);
select * from t right join s on (s.a = t.a and s.b = t.b);
select t.*, s.* from t left join s on (s.a = t.a and s.b = t.b) SETTINGS join_use_nulls = 1;
select t.*, s.* from t right join s on (s.a = t.a and s.b = t.b) SETTINGS join_use_nulls = 1;
drop table t;
drop table s;
create table t(a Int64, b Nullable(Int64), c String) engine = Memory;
create table s(a Int64, b Nullable(Int64), c String) engine = Memory;
insert into t values(1,1,'a');
insert into s values(2,2,'a');
select t.*, s.* from t left join s on (s.a = t.a and s.b = t.b);
select t.*, s.* from t right join s on (s.a = t.a and s.b = t.b);
select * from t left join s on (s.a = t.a and s.b = t.b) SETTINGS join_use_nulls = 1;
select * from t right join s on (s.a = t.a and s.b = t.b) SETTINGS join_use_nulls = 1;
drop table t;
drop table s;
create table t(a Int64, b Nullable(Int64), c Nullable(String)) engine = Memory;
create table s(a Int64, b Nullable(Int64), c Nullable(String)) engine = Memory;
insert into t values(1,1,'a');
insert into s values(2,2,'a');
select t.*, s.a, s.b, s.c from t left join s on (s.a = t.a and s.b = t.b);
select t.*, s.a, s.b, s.c from t right join s on (s.a = t.a and s.b = t.b);
select * from t left join s on (s.a = t.a and s.b = t.b) SETTINGS join_use_nulls = 1;
select * from t right join s on (s.a = t.a and s.b = t.b) SETTINGS join_use_nulls = 1;
drop table t;
drop table s;
create table t(a Nullable(Int64), b Nullable(Int64), c Nullable(String)) engine = Memory;
create table s(a Nullable(Int64), b Nullable(Int64), c Nullable(String)) engine = Memory;
insert into t values(1,1,'a');
insert into s values(2,2,'a');
select * from t left join s on (s.a = t.a and s.b = t.b);
select * from t right join s on (s.a = t.a and s.b = t.b);
select t.*, s.a, s.b, s.c from t left join s on (s.a = t.a and s.b = t.b) SETTINGS join_use_nulls = 1;
select t.*, s.a, s.b, s.c from t right join s on (s.a = t.a and s.b = t.b) SETTINGS join_use_nulls = 1;
drop table t;
drop table s;