fix select from StorageJoin

This commit is contained in:
Artem Zuikov 2020-06-01 20:27:22 +03:00
parent 9028a25f3c
commit bfa59adbfe
3 changed files with 78 additions and 43 deletions

View File

@ -251,22 +251,26 @@ public:
, max_block_size(max_block_size_) , max_block_size(max_block_size_)
, sample_block(std::move(sample_block_)) , sample_block(std::move(sample_block_))
{ {
columns.resize(sample_block.columns());
column_indices.resize(sample_block.columns()); column_indices.resize(sample_block.columns());
column_with_null.resize(sample_block.columns());
auto & saved_block = parent.getJoinedData()->sample_block;
for (size_t i = 0; i < sample_block.columns(); ++i) for (size_t i = 0; i < sample_block.columns(); ++i)
{ {
auto & [_, type, name] = sample_block.getByPosition(i); auto & [_, type, name] = sample_block.getByPosition(i);
if (parent.right_table_keys.has(name)) if (parent.right_table_keys.has(name))
{ {
key_pos = i; key_pos = i;
column_with_null[i] = parent.right_table_keys.getByName(name).type->isNullable(); auto & column = parent.right_table_keys.getByName(name);
restored_block.insert(column);
} }
else else
{ {
auto pos = parent.sample_block_with_columns_to_add.getPositionByName(name); size_t pos = saved_block.getPositionByName(name);
column_indices[i] = pos; column_indices[i] = pos;
column_with_null[i] = !parent.sample_block_with_columns_to_add.getByPosition(pos).type->equals(*type);
auto & column = saved_block.getByPosition(pos);
restored_block.insert(column);
} }
} }
} }
@ -291,11 +295,10 @@ private:
std::shared_lock<std::shared_mutex> lock; std::shared_lock<std::shared_mutex> lock;
UInt64 max_block_size; UInt64 max_block_size;
Block sample_block; Block sample_block;
Block restored_block; /// sample_block with parent column types
ColumnNumbers column_indices; ColumnNumbers column_indices;
std::vector<bool> column_with_null;
std::optional<size_t> key_pos; std::optional<size_t> key_pos;
MutableColumns columns;
std::unique_ptr<void, std::function<void(void *)>> position; /// type erasure std::unique_ptr<void, std::function<void(void *)>> position; /// type erasure
@ -303,23 +306,7 @@ private:
template <ASTTableJoin::Kind KIND, ASTTableJoin::Strictness STRICTNESS, typename Maps> template <ASTTableJoin::Kind KIND, ASTTableJoin::Strictness STRICTNESS, typename Maps>
Chunk createChunk(const Maps & maps) Chunk createChunk(const Maps & maps)
{ {
for (size_t i = 0; i < sample_block.columns(); ++i) MutableColumns columns = restored_block.cloneEmpty().mutateColumns();
{
const auto & src_col = sample_block.safeGetByPosition(i);
columns[i] = src_col.type->createColumn();
if (column_with_null[i])
{
if (key_pos == i)
{
// unwrap null key column
auto & nullable_col = assert_cast<ColumnNullable &>(*columns[i]);
columns[i] = nullable_col.getNestedColumnPtr()->assumeMutable();
}
else
// wrap non key column with null
columns[i] = makeNullable(std::move(columns[i]))->assumeMutable();
}
}
size_t rows_added = 0; size_t rows_added = 0;
@ -327,7 +314,7 @@ private:
{ {
#define M(TYPE) \ #define M(TYPE) \
case HashJoin::Type::TYPE: \ case HashJoin::Type::TYPE: \
rows_added = fillColumns<KIND, STRICTNESS>(*maps.TYPE); \ rows_added = fillColumns<KIND, STRICTNESS>(*maps.TYPE, columns); \
break; break;
APPLY_FOR_JOIN_VARIANTS_LIMITED(M) APPLY_FOR_JOIN_VARIANTS_LIMITED(M)
#undef M #undef M
@ -340,29 +327,27 @@ private:
if (!rows_added) if (!rows_added)
return {}; return {};
Columns res_columns; /// Correct nullability
res_columns.reserve(columns.size());
for (size_t i = 0; i < columns.size(); ++i) for (size_t i = 0; i < columns.size(); ++i)
if (column_with_null[i]) {
{ bool src_nullable = restored_block.getByPosition(i).type->isNullable();
if (key_pos == i) bool dst_nullable = sample_block.getByPosition(i).type->isNullable();
res_columns.emplace_back(makeNullable(std::move(columns[i])));
else
{
const auto & nullable_col = assert_cast<const ColumnNullable &>(*columns[i]);
res_columns.emplace_back(makeNullable(nullable_col.getNestedColumnPtr()));
}
}
else
res_columns.emplace_back(std::move(columns[i]));
UInt64 num_rows = res_columns.at(0)->size(); if (src_nullable && !dst_nullable)
return Chunk(std::move(res_columns), num_rows); {
auto & nullable_column = assert_cast<ColumnNullable &>(*columns[i]);
columns[i] = nullable_column.getNestedColumnPtr()->assumeMutable();
}
else if (!src_nullable && dst_nullable)
columns[i] = makeNullable(std::move(columns[i]))->assumeMutable();
}
UInt64 num_rows = columns.at(0)->size();
return Chunk(std::move(columns), num_rows);
} }
template <ASTTableJoin::Kind KIND, ASTTableJoin::Strictness STRICTNESS, typename Map> template <ASTTableJoin::Kind KIND, ASTTableJoin::Strictness STRICTNESS, typename Map>
size_t fillColumns(const Map & map) size_t fillColumns(const Map & map, MutableColumns & columns)
{ {
size_t rows_added = 0; size_t rows_added = 0;

View File

@ -0,0 +1,8 @@
1 s 1 String String
2 s 2 String String
3 s 3 Nullable(String) String
4 s 4 String Nullable(String)
1 s 1 String String
2 s 2 String String
3 s 3 Nullable(String) String
4 s 4 String Nullable(String)

View File

@ -0,0 +1,42 @@
DROP TABLE IF EXISTS t1;
DROP TABLE IF EXISTS t2;
DROP TABLE IF EXISTS t3;
DROP TABLE IF EXISTS t4;
CREATE TABLE t1 (id String, name String, value UInt32)
ENGINE = Join(ANY, LEFT, id)
SETTINGS join_use_nulls = 1;
CREATE TABLE t2 (id String, name String, value UInt32)
ENGINE = Join(ANY, LEFT, id)
SETTINGS join_use_nulls = 0;
CREATE TABLE t3 (id Nullable(String), name String, value UInt32)
ENGINE = Join(ANY, LEFT, id)
SETTINGS join_use_nulls = 1;
CREATE TABLE t4 (id String, name Nullable(String), value UInt32)
ENGINE = Join(ANY, LEFT, id)
SETTINGS join_use_nulls = 0;
insert into t1 values('1', 's', 1);
insert into t2 values('2', 's', 2);
insert into t3 values('3', 's', 3);
insert into t4 values('4', 's', 4);
select *, toTypeName(id), toTypeName(name) from t1;
select *, toTypeName(id), toTypeName(name) from t2;
select *, toTypeName(id), toTypeName(name) from t3;
select *, toTypeName(id), toTypeName(name) from t4;
SET join_use_nulls = 1;
select *, toTypeName(id), toTypeName(name) from t1;
select *, toTypeName(id), toTypeName(name) from t2;
select *, toTypeName(id), toTypeName(name) from t3;
select *, toTypeName(id), toTypeName(name) from t4;
DROP TABLE t1;
DROP TABLE t2;
DROP TABLE t3;
DROP TABLE t4;