working version

This commit is contained in:
Artem Zuikov 2020-04-13 20:03:11 +03:00
parent 12ca9f5912
commit 0b7ae2c2bf
7 changed files with 231 additions and 32 deletions

View File

@ -295,17 +295,22 @@ public:
: key_columns(key_columns_)
{}
FindResult findKey(const DictionaryReader & reader, size_t i, const Arena &)
FindResult findKey(const TableJoin & table_join, size_t row, const Arena &)
{
const DictionaryReader & reader = *table_join.dictionary_reader;
if (!read_result)
{
reader.readKeys(*key_columns[0], key_columns[0]->size(), read_result, found, positions);
result.block = &read_result;
/// TODO: check types and correct nullability
if (table_join.forceNullableRight())
for (auto & column : read_result)
if (table_join.rightBecomeNullable(column.type))
JoinCommon::convertColumnToNullable(column);
}
result.row_num = positions[i];
return FindResult(&result, found[i]);
result.row_num = positions[row];
return FindResult(&result, found[row]);
}
private:
@ -985,14 +990,14 @@ IColumn::Filter switchJoinRightColumns(const Maps & maps_, AddedColumns & added_
}
template <ASTTableJoin::Kind KIND, ASTTableJoin::Strictness STRICTNESS>
IColumn::Filter dictionaryJoinRightColumns(const DictionaryReader & reader, AddedColumns & added_columns, const ConstNullMapPtr & null_map)
IColumn::Filter dictionaryJoinRightColumns(const TableJoin & table_join, AddedColumns & added_columns, const ConstNullMapPtr & null_map)
{
if constexpr (KIND == ASTTableJoin::Kind::Left &&
(STRICTNESS == ASTTableJoin::Strictness::Any ||
STRICTNESS == ASTTableJoin::Strictness::Semi ||
STRICTNESS == ASTTableJoin::Strictness::Anti))
{
return joinRightColumnsSwitchNullability<KIND, STRICTNESS, KeyGetterForDict>(reader, added_columns, null_map);
return joinRightColumnsSwitchNullability<KIND, STRICTNESS, KeyGetterForDict>(table_join, added_columns, null_map);
}
throw Exception("Logical error: wrong JOIN combination", ErrorCodes::LOGICAL_ERROR);
@ -1059,7 +1064,7 @@ void HashJoin::joinBlockImpl(
added_columns.need_filter = need_filter || has_required_right_keys;
IColumn::Filter row_filter = overDictionary() ?
dictionaryJoinRightColumns<KIND, STRICTNESS>(*table_join->dictionary_reader, added_columns, null_map) :
dictionaryJoinRightColumns<KIND, STRICTNESS>(*table_join, added_columns, null_map) :
switchJoinRightColumns<KIND, STRICTNESS>(maps_, added_columns, data->type, null_map);
for (size_t i = 0; i < added_columns.size(); ++i)

View File

@ -155,22 +155,26 @@ NamesWithAliases TableJoin::getRequiredColumns(const Block & sample, const Names
return getNamesWithAliases(required_columns);
}
bool TableJoin::leftBecomeNullable(const DataTypePtr & column_type) const
{
return forceNullableLeft() && column_type->canBeInsideNullable();
}
bool TableJoin::rightBecomeNullable(const DataTypePtr & column_type) const
{
return forceNullableRight() && column_type->canBeInsideNullable();
}
void TableJoin::addJoinedColumn(const NameAndTypePair & joined_column)
{
if (join_use_nulls && isLeftOrFull(table_join.kind))
{
auto type = joined_column.type->canBeInsideNullable() ? makeNullable(joined_column.type) : joined_column.type;
columns_added_by_join.emplace_back(NameAndTypePair(joined_column.name, std::move(type)));
}
if (rightBecomeNullable(joined_column.type))
columns_added_by_join.emplace_back(NameAndTypePair(joined_column.name, makeNullable(joined_column.type)));
else
columns_added_by_join.push_back(joined_column);
}
void TableJoin::addJoinedColumnsAndCorrectNullability(Block & sample_block) const
{
bool right_or_full_join = isRightOrFull(table_join.kind);
bool left_or_full_join = isLeftOrFull(table_join.kind);
for (auto & col : sample_block)
{
/// Materialize column.
@ -179,9 +183,7 @@ void TableJoin::addJoinedColumnsAndCorrectNullability(Block & sample_block) cons
if (col.column)
col.column = nullptr;
bool make_nullable = join_use_nulls && right_or_full_join;
if (make_nullable && col.type->canBeInsideNullable())
if (leftBecomeNullable(col.type))
col.type = makeNullable(col.type);
}
@ -189,9 +191,7 @@ void TableJoin::addJoinedColumnsAndCorrectNullability(Block & sample_block) cons
{
auto res_type = col.type;
bool make_nullable = join_use_nulls && left_or_full_join;
if (make_nullable && res_type->canBeInsideNullable())
if (rightBecomeNullable(res_type))
res_type = makeNullable(res_type);
sample_block.insert(ColumnWithTypeAndName(nullptr, res_type, col.name));
@ -240,6 +240,10 @@ bool TableJoin::allowMergeJoin() const
bool TableJoin::allowDictJoin(const String & dict_key, const Block & sample_block, Names & names, NamesAndTypesList & result_columns) const
{
/// Support ALL INNER, [ANY | ALL | SEMI | ANTI] LEFT
if (!isLeft(kind()) && !(isInner(kind()) && strictness() == ASTTableJoin::Strictness::All))
return false;
const Names & right_keys = keyNamesRight();
if (right_keys.size() != 1)
return false;

View File

@ -122,6 +122,8 @@ public:
size_t rightKeyInclusion(const String & name) const;
NameSet requiredRightKeys() const;
bool leftBecomeNullable(const DataTypePtr & column_type) const;
bool rightBecomeNullable(const DataTypePtr & column_type) const;
void addJoinedColumn(const NameAndTypePair & joined_column);
void addJoinedColumnsAndCorrectNullability(Block & sample_block) const;

View File

@ -1,5 +1,3 @@
SET any_join_distinct_right_table_keys = 1;
drop table IF EXISTS joinbug;
CREATE TABLE joinbug (
@ -21,7 +19,7 @@ CREATE TABLE joinbug_join (
val UInt64,
val2 Int32,
created UInt64
) ENGINE = Join(ANY, INNER, id2);
) ENGINE = Join(SEMI, LEFT, id2);
insert into joinbug_join (id, id2, val, val2, created)
select id, id2, val, val2, created
@ -36,7 +34,7 @@ select id, id2, val, val2, created
from (
SELECT toUInt64(arrayJoin(range(50))) AS id2
) js1
ANY INNER JOIN joinbug_join using id2;
SEMI LEFT JOIN joinbug_join using id2;
DROP TABLE joinbug;
DROP TABLE joinbug_join;

View File

@ -0,0 +1,103 @@
flat: left on
0 0 0 0 0
1 1 1 1 1
2 2 2 2 2
3 3 3 3 3
4 0 0 0
flat: left
0 0 0 0
1 1 1 1
2 2 2 2
3 3 3 3
4 0 0
flat: any left
0 0 0 0
1 1 1 1
2 2 2 2
3 3 3 3
4 0 0
flat: semi left
0 0 0 0
1 1 1 1
2 2 2 2
3 3 3 3
flat: anti left
4 0 0
flat: inner
0 0 0 0
1 1 1 1
flat: inner on
0 0 0 0 0
1 1 1 1 1
2 2 2 2 2
3 3 3 3 3
hashed: left on
0 0 0 0 0
1 1 1 1 1
2 2 2 2 2
3 3 3 3 3
4 \N \N \N \N
hashed: left
0 0 0 0
1 1 1 1
2 2 2 2
3 3 3 3
4 \N \N \N
hashed: any left
0 0 0 0
1 1 1 1
2 2 2 2
3 3 3 3
4 \N \N \N
hashed: semi left
0 0 0 0
1 1 1 1
2 2 2 2
3 3 3 3
hashed: anti left
4 \N \N \N
hashed: inner
0 0 0 0
1 1 1 1
hashed: inner on
0 0 0 0 0
1 1 1 1 1
2 2 2 2 2
3 3 3 3 3
complex_cache (smoke)
0 \N \N \N \N
1 \N \N \N \N
2 \N \N \N \N
3 \N \N \N \N
4 \N \N \N \N
not optimized (smoke)
0 0 0 0
1 1 1 1
2 2 2 2
3 3 3 3
-
0 0 0 0 0
1 1 1 1 1
\N 2 2 2 2
\N 3 3 3 3
-
2 2 2 2
3 3 3 3
4 \N \N \N
5 \N \N \N
\N 0 0 0
\N 1 1 1
-
0 0 0 0
1 1 1 1
-
0 0 0 0
1 1 1 1
3 3 3 3
2 2 2 2
-
0 0 0 0
1 1 1 1
-
3 3 3 3
2 2 2 2

View File

@ -0,0 +1,90 @@
SET send_logs_level = 'none';
DROP DATABASE IF EXISTS db_01115;
CREATE DATABASE db_01115 Engine = Ordinary;
USE db_01115;
DROP DICTIONARY IF EXISTS dict_flat;
DROP DICTIONARY IF EXISTS dict_hashed;
DROP DICTIONARY IF EXISTS dict_complex_cache;
CREATE TABLE t1 (key UInt64, a UInt8, b String, c Float64) ENGINE = MergeTree() ORDER BY key;
INSERT INTO t1 SELECT number, number, toString(number), number from numbers(4);
CREATE DICTIONARY dict_flat (key UInt64 DEFAULT 0, a UInt8 DEFAULT 42, b String DEFAULT 'x', c Float64 DEFAULT 42.0)
PRIMARY KEY key
SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 't1' PASSWORD '' DB 'db_01115'))
LIFETIME(MIN 1 MAX 10)
LAYOUT(FLAT());
CREATE DICTIONARY db_01115.dict_hashed (key UInt64 DEFAULT 0, a UInt8 DEFAULT 42, b String DEFAULT 'x', c Float64 DEFAULT 42.0)
PRIMARY KEY key
SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 't1' DB 'db_01115'))
LIFETIME(MIN 1 MAX 10)
LAYOUT(HASHED());
CREATE DICTIONARY dict_complex_cache (key UInt64 DEFAULT 0, a UInt8 DEFAULT 42, b String DEFAULT 'x', c Float64 DEFAULT 42.0)
PRIMARY KEY key, b
SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 't1' DB 'db_01115'))
LIFETIME(MIN 1 MAX 10)
LAYOUT(COMPLEX_KEY_CACHE(SIZE_IN_CELLS 1));
SET join_use_nulls = 0;
SELECT 'flat: left on';
SELECT * FROM (SELECT number AS key FROM numbers(5)) s1 LEFT JOIN dict_flat d ON s1.key = d.key ORDER BY s1.key;
SELECT 'flat: left';
SELECT * FROM (SELECT number AS key FROM numbers(5)) s1 LEFT JOIN dict_flat d USING(key) ORDER BY key;
SELECT 'flat: any left';
SELECT * FROM (SELECT number AS key FROM numbers(5)) s1 ANY LEFT JOIN dict_flat d USING(key) ORDER BY key;
SELECT 'flat: semi left';
SELECT * FROM (SELECT number AS key FROM numbers(5)) s1 SEMI JOIN dict_flat d USING(key) ORDER BY key;
SELECT 'flat: anti left';
SELECT * FROM (SELECT number AS key FROM numbers(5)) s1 ANTI JOIN dict_flat d USING(key) ORDER BY key;
SELECT 'flat: inner';
SELECT * FROM (SELECT number AS key FROM numbers(2)) s1 JOIN dict_flat d USING(key);
SELECT 'flat: inner on';
SELECT * FROM (SELECT number AS k FROM numbers(100)) s1 JOIN dict_flat d ON k = key ORDER BY k;
SET join_use_nulls = 1;
SELECT 'hashed: left on';
SELECT * FROM (SELECT number AS key FROM numbers(5)) s1 LEFT JOIN dict_hashed d ON s1.key = d.key ORDER BY s1.key;
SELECT 'hashed: left';
SELECT * FROM (SELECT number AS key FROM numbers(5)) s1 LEFT JOIN dict_hashed d USING(key) ORDER BY key;
SELECT 'hashed: any left';
SELECT * FROM (SELECT number AS key FROM numbers(5)) s1 ANY LEFT JOIN dict_hashed d USING(key) ORDER BY key;
SELECT 'hashed: semi left';
SELECT * FROM (SELECT number AS key FROM numbers(5)) s1 SEMI JOIN dict_hashed d USING(key) ORDER BY key;
SELECT 'hashed: anti left';
SELECT * FROM (SELECT number AS key FROM numbers(5)) s1 ANTI JOIN dict_hashed d USING(key) ORDER BY key;
SELECT 'hashed: inner';
SELECT * FROM (SELECT number AS key FROM numbers(2)) s1 JOIN dict_hashed d USING(key);
SELECT 'hashed: inner on';
SELECT * FROM (SELECT number AS k FROM numbers(100)) s1 JOIN dict_hashed d ON k = key ORDER BY k;
SELECT 'complex_cache (smoke)';
SELECT * FROM (SELECT number AS key FROM numbers(5)) s1 LEFT JOIN dict_complex_cache d ON s1.key = d.key ORDER BY s1.key;
SELECT 'not optimized (smoke)';
SELECT * FROM (SELECT number AS key FROM numbers(2)) s1 RIGHT JOIN dict_flat d USING(key) ORDER BY key;
SELECT '-';
SELECT * FROM (SELECT number AS key FROM numbers(2)) s1 RIGHT JOIN dict_flat d ON s1.key = d.key ORDER BY d.key;
SELECT '-';
SELECT * FROM (SELECT number + 2 AS key FROM numbers(4)) s1 FULL JOIN dict_flat d USING(key) ORDER BY s1.key, d.key;
SELECT '-';
SELECT * FROM (SELECT number AS key FROM numbers(2)) s1 ANY INNER JOIN dict_flat d USING(key);
SELECT '-';
SELECT * FROM (SELECT number AS key FROM numbers(2)) s1 ANY RIGHT JOIN dict_flat d USING(key);
SELECT '-';
SELECT * FROM (SELECT number AS key FROM numbers(2)) s1 SEMI RIGHT JOIN dict_flat d USING(key);
SELECT '-';
SELECT * FROM (SELECT number AS key FROM numbers(2)) s1 ANTI RIGHT JOIN dict_flat d USING(key);
DROP DICTIONARY dict_flat;
DROP DICTIONARY dict_hashed;
DROP DICTIONARY dict_complex_cache;
DROP TABLE t1;
DROP DATABASE IF EXISTS db_01115;

View File

@ -1,10 +1,7 @@
SET any_join_distinct_right_table_keys = 1;
USE test;
DROP TABLE IF EXISTS join;
CREATE TABLE join (UserID UInt64, loyalty Int8) ENGINE = Join(ANY, INNER, UserID)
SETTINGS any_join_distinct_right_table_keys = 1;
CREATE TABLE join (UserID UInt64, loyalty Int8) ENGINE = Join(SEMI, LEFT, UserID);
INSERT INTO join
SELECT
@ -20,17 +17,17 @@ HAVING (yandex + google) > 10;
SELECT
loyalty,
count()
FROM hits ANY INNER JOIN join USING UserID
FROM hits SEMI LEFT JOIN join USING UserID
GROUP BY loyalty
ORDER BY loyalty ASC;
DETACH TABLE join;
ATTACH TABLE join (UserID UInt64, loyalty Int8) ENGINE = Join(ANY, INNER, UserID);
ATTACH TABLE join (UserID UInt64, loyalty Int8) ENGINE = Join(SEMI, LEFT, UserID);
SELECT
loyalty,
count()
FROM hits ANY INNER JOIN join USING UserID
FROM hits SEMI LEFT JOIN join USING UserID
GROUP BY loyalty
ORDER BY loyalty ASC;