mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-23 08:02:02 +00:00
working version
This commit is contained in:
parent
12ca9f5912
commit
0b7ae2c2bf
@ -295,17 +295,22 @@ public:
|
||||
: key_columns(key_columns_)
|
||||
{}
|
||||
|
||||
FindResult findKey(const DictionaryReader & reader, size_t i, const Arena &)
|
||||
FindResult findKey(const TableJoin & table_join, size_t row, const Arena &)
|
||||
{
|
||||
const DictionaryReader & reader = *table_join.dictionary_reader;
|
||||
if (!read_result)
|
||||
{
|
||||
reader.readKeys(*key_columns[0], key_columns[0]->size(), read_result, found, positions);
|
||||
result.block = &read_result;
|
||||
/// TODO: check types and correct nullability
|
||||
|
||||
if (table_join.forceNullableRight())
|
||||
for (auto & column : read_result)
|
||||
if (table_join.rightBecomeNullable(column.type))
|
||||
JoinCommon::convertColumnToNullable(column);
|
||||
}
|
||||
|
||||
result.row_num = positions[i];
|
||||
return FindResult(&result, found[i]);
|
||||
result.row_num = positions[row];
|
||||
return FindResult(&result, found[row]);
|
||||
}
|
||||
|
||||
private:
|
||||
@ -985,14 +990,14 @@ IColumn::Filter switchJoinRightColumns(const Maps & maps_, AddedColumns & added_
|
||||
}
|
||||
|
||||
template <ASTTableJoin::Kind KIND, ASTTableJoin::Strictness STRICTNESS>
|
||||
IColumn::Filter dictionaryJoinRightColumns(const DictionaryReader & reader, AddedColumns & added_columns, const ConstNullMapPtr & null_map)
|
||||
IColumn::Filter dictionaryJoinRightColumns(const TableJoin & table_join, AddedColumns & added_columns, const ConstNullMapPtr & null_map)
|
||||
{
|
||||
if constexpr (KIND == ASTTableJoin::Kind::Left &&
|
||||
(STRICTNESS == ASTTableJoin::Strictness::Any ||
|
||||
STRICTNESS == ASTTableJoin::Strictness::Semi ||
|
||||
STRICTNESS == ASTTableJoin::Strictness::Anti))
|
||||
{
|
||||
return joinRightColumnsSwitchNullability<KIND, STRICTNESS, KeyGetterForDict>(reader, added_columns, null_map);
|
||||
return joinRightColumnsSwitchNullability<KIND, STRICTNESS, KeyGetterForDict>(table_join, added_columns, null_map);
|
||||
}
|
||||
|
||||
throw Exception("Logical error: wrong JOIN combination", ErrorCodes::LOGICAL_ERROR);
|
||||
@ -1059,7 +1064,7 @@ void HashJoin::joinBlockImpl(
|
||||
added_columns.need_filter = need_filter || has_required_right_keys;
|
||||
|
||||
IColumn::Filter row_filter = overDictionary() ?
|
||||
dictionaryJoinRightColumns<KIND, STRICTNESS>(*table_join->dictionary_reader, added_columns, null_map) :
|
||||
dictionaryJoinRightColumns<KIND, STRICTNESS>(*table_join, added_columns, null_map) :
|
||||
switchJoinRightColumns<KIND, STRICTNESS>(maps_, added_columns, data->type, null_map);
|
||||
|
||||
for (size_t i = 0; i < added_columns.size(); ++i)
|
||||
|
@ -155,22 +155,26 @@ NamesWithAliases TableJoin::getRequiredColumns(const Block & sample, const Names
|
||||
return getNamesWithAliases(required_columns);
|
||||
}
|
||||
|
||||
bool TableJoin::leftBecomeNullable(const DataTypePtr & column_type) const
|
||||
{
|
||||
return forceNullableLeft() && column_type->canBeInsideNullable();
|
||||
}
|
||||
|
||||
bool TableJoin::rightBecomeNullable(const DataTypePtr & column_type) const
|
||||
{
|
||||
return forceNullableRight() && column_type->canBeInsideNullable();
|
||||
}
|
||||
|
||||
void TableJoin::addJoinedColumn(const NameAndTypePair & joined_column)
|
||||
{
|
||||
if (join_use_nulls && isLeftOrFull(table_join.kind))
|
||||
{
|
||||
auto type = joined_column.type->canBeInsideNullable() ? makeNullable(joined_column.type) : joined_column.type;
|
||||
columns_added_by_join.emplace_back(NameAndTypePair(joined_column.name, std::move(type)));
|
||||
}
|
||||
if (rightBecomeNullable(joined_column.type))
|
||||
columns_added_by_join.emplace_back(NameAndTypePair(joined_column.name, makeNullable(joined_column.type)));
|
||||
else
|
||||
columns_added_by_join.push_back(joined_column);
|
||||
}
|
||||
|
||||
void TableJoin::addJoinedColumnsAndCorrectNullability(Block & sample_block) const
|
||||
{
|
||||
bool right_or_full_join = isRightOrFull(table_join.kind);
|
||||
bool left_or_full_join = isLeftOrFull(table_join.kind);
|
||||
|
||||
for (auto & col : sample_block)
|
||||
{
|
||||
/// Materialize column.
|
||||
@ -179,9 +183,7 @@ void TableJoin::addJoinedColumnsAndCorrectNullability(Block & sample_block) cons
|
||||
if (col.column)
|
||||
col.column = nullptr;
|
||||
|
||||
bool make_nullable = join_use_nulls && right_or_full_join;
|
||||
|
||||
if (make_nullable && col.type->canBeInsideNullable())
|
||||
if (leftBecomeNullable(col.type))
|
||||
col.type = makeNullable(col.type);
|
||||
}
|
||||
|
||||
@ -189,9 +191,7 @@ void TableJoin::addJoinedColumnsAndCorrectNullability(Block & sample_block) cons
|
||||
{
|
||||
auto res_type = col.type;
|
||||
|
||||
bool make_nullable = join_use_nulls && left_or_full_join;
|
||||
|
||||
if (make_nullable && res_type->canBeInsideNullable())
|
||||
if (rightBecomeNullable(res_type))
|
||||
res_type = makeNullable(res_type);
|
||||
|
||||
sample_block.insert(ColumnWithTypeAndName(nullptr, res_type, col.name));
|
||||
@ -240,6 +240,10 @@ bool TableJoin::allowMergeJoin() const
|
||||
|
||||
bool TableJoin::allowDictJoin(const String & dict_key, const Block & sample_block, Names & names, NamesAndTypesList & result_columns) const
|
||||
{
|
||||
/// Support ALL INNER, [ANY | ALL | SEMI | ANTI] LEFT
|
||||
if (!isLeft(kind()) && !(isInner(kind()) && strictness() == ASTTableJoin::Strictness::All))
|
||||
return false;
|
||||
|
||||
const Names & right_keys = keyNamesRight();
|
||||
if (right_keys.size() != 1)
|
||||
return false;
|
||||
|
@ -122,6 +122,8 @@ public:
|
||||
size_t rightKeyInclusion(const String & name) const;
|
||||
NameSet requiredRightKeys() const;
|
||||
|
||||
bool leftBecomeNullable(const DataTypePtr & column_type) const;
|
||||
bool rightBecomeNullable(const DataTypePtr & column_type) const;
|
||||
void addJoinedColumn(const NameAndTypePair & joined_column);
|
||||
void addJoinedColumnsAndCorrectNullability(Block & sample_block) const;
|
||||
|
||||
|
@ -1,5 +1,3 @@
|
||||
SET any_join_distinct_right_table_keys = 1;
|
||||
|
||||
drop table IF EXISTS joinbug;
|
||||
|
||||
CREATE TABLE joinbug (
|
||||
@ -21,7 +19,7 @@ CREATE TABLE joinbug_join (
|
||||
val UInt64,
|
||||
val2 Int32,
|
||||
created UInt64
|
||||
) ENGINE = Join(ANY, INNER, id2);
|
||||
) ENGINE = Join(SEMI, LEFT, id2);
|
||||
|
||||
insert into joinbug_join (id, id2, val, val2, created)
|
||||
select id, id2, val, val2, created
|
||||
@ -36,7 +34,7 @@ select id, id2, val, val2, created
|
||||
from (
|
||||
SELECT toUInt64(arrayJoin(range(50))) AS id2
|
||||
) js1
|
||||
ANY INNER JOIN joinbug_join using id2;
|
||||
SEMI LEFT JOIN joinbug_join using id2;
|
||||
|
||||
DROP TABLE joinbug;
|
||||
DROP TABLE joinbug_join;
|
||||
|
103
tests/queries/0_stateless/01115_join_with_dictionary.reference
Normal file
103
tests/queries/0_stateless/01115_join_with_dictionary.reference
Normal file
@ -0,0 +1,103 @@
|
||||
flat: left on
|
||||
0 0 0 0 0
|
||||
1 1 1 1 1
|
||||
2 2 2 2 2
|
||||
3 3 3 3 3
|
||||
4 0 0 0
|
||||
flat: left
|
||||
0 0 0 0
|
||||
1 1 1 1
|
||||
2 2 2 2
|
||||
3 3 3 3
|
||||
4 0 0
|
||||
flat: any left
|
||||
0 0 0 0
|
||||
1 1 1 1
|
||||
2 2 2 2
|
||||
3 3 3 3
|
||||
4 0 0
|
||||
flat: semi left
|
||||
0 0 0 0
|
||||
1 1 1 1
|
||||
2 2 2 2
|
||||
3 3 3 3
|
||||
flat: anti left
|
||||
4 0 0
|
||||
flat: inner
|
||||
0 0 0 0
|
||||
1 1 1 1
|
||||
flat: inner on
|
||||
0 0 0 0 0
|
||||
1 1 1 1 1
|
||||
2 2 2 2 2
|
||||
3 3 3 3 3
|
||||
hashed: left on
|
||||
0 0 0 0 0
|
||||
1 1 1 1 1
|
||||
2 2 2 2 2
|
||||
3 3 3 3 3
|
||||
4 \N \N \N \N
|
||||
hashed: left
|
||||
0 0 0 0
|
||||
1 1 1 1
|
||||
2 2 2 2
|
||||
3 3 3 3
|
||||
4 \N \N \N
|
||||
hashed: any left
|
||||
0 0 0 0
|
||||
1 1 1 1
|
||||
2 2 2 2
|
||||
3 3 3 3
|
||||
4 \N \N \N
|
||||
hashed: semi left
|
||||
0 0 0 0
|
||||
1 1 1 1
|
||||
2 2 2 2
|
||||
3 3 3 3
|
||||
hashed: anti left
|
||||
4 \N \N \N
|
||||
hashed: inner
|
||||
0 0 0 0
|
||||
1 1 1 1
|
||||
hashed: inner on
|
||||
0 0 0 0 0
|
||||
1 1 1 1 1
|
||||
2 2 2 2 2
|
||||
3 3 3 3 3
|
||||
complex_cache (smoke)
|
||||
0 \N \N \N \N
|
||||
1 \N \N \N \N
|
||||
2 \N \N \N \N
|
||||
3 \N \N \N \N
|
||||
4 \N \N \N \N
|
||||
not optimized (smoke)
|
||||
0 0 0 0
|
||||
1 1 1 1
|
||||
2 2 2 2
|
||||
3 3 3 3
|
||||
-
|
||||
0 0 0 0 0
|
||||
1 1 1 1 1
|
||||
\N 2 2 2 2
|
||||
\N 3 3 3 3
|
||||
-
|
||||
2 2 2 2
|
||||
3 3 3 3
|
||||
4 \N \N \N
|
||||
5 \N \N \N
|
||||
\N 0 0 0
|
||||
\N 1 1 1
|
||||
-
|
||||
0 0 0 0
|
||||
1 1 1 1
|
||||
-
|
||||
0 0 0 0
|
||||
1 1 1 1
|
||||
3 3 3 3
|
||||
2 2 2 2
|
||||
-
|
||||
0 0 0 0
|
||||
1 1 1 1
|
||||
-
|
||||
3 3 3 3
|
||||
2 2 2 2
|
90
tests/queries/0_stateless/01115_join_with_dictionary.sql
Normal file
90
tests/queries/0_stateless/01115_join_with_dictionary.sql
Normal file
@ -0,0 +1,90 @@
|
||||
SET send_logs_level = 'none';
|
||||
|
||||
DROP DATABASE IF EXISTS db_01115;
|
||||
CREATE DATABASE db_01115 Engine = Ordinary;
|
||||
|
||||
USE db_01115;
|
||||
|
||||
DROP DICTIONARY IF EXISTS dict_flat;
|
||||
DROP DICTIONARY IF EXISTS dict_hashed;
|
||||
DROP DICTIONARY IF EXISTS dict_complex_cache;
|
||||
|
||||
CREATE TABLE t1 (key UInt64, a UInt8, b String, c Float64) ENGINE = MergeTree() ORDER BY key;
|
||||
INSERT INTO t1 SELECT number, number, toString(number), number from numbers(4);
|
||||
|
||||
CREATE DICTIONARY dict_flat (key UInt64 DEFAULT 0, a UInt8 DEFAULT 42, b String DEFAULT 'x', c Float64 DEFAULT 42.0)
|
||||
PRIMARY KEY key
|
||||
SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 't1' PASSWORD '' DB 'db_01115'))
|
||||
LIFETIME(MIN 1 MAX 10)
|
||||
LAYOUT(FLAT());
|
||||
|
||||
CREATE DICTIONARY db_01115.dict_hashed (key UInt64 DEFAULT 0, a UInt8 DEFAULT 42, b String DEFAULT 'x', c Float64 DEFAULT 42.0)
|
||||
PRIMARY KEY key
|
||||
SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 't1' DB 'db_01115'))
|
||||
LIFETIME(MIN 1 MAX 10)
|
||||
LAYOUT(HASHED());
|
||||
|
||||
CREATE DICTIONARY dict_complex_cache (key UInt64 DEFAULT 0, a UInt8 DEFAULT 42, b String DEFAULT 'x', c Float64 DEFAULT 42.0)
|
||||
PRIMARY KEY key, b
|
||||
SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 't1' DB 'db_01115'))
|
||||
LIFETIME(MIN 1 MAX 10)
|
||||
LAYOUT(COMPLEX_KEY_CACHE(SIZE_IN_CELLS 1));
|
||||
|
||||
SET join_use_nulls = 0;
|
||||
|
||||
SELECT 'flat: left on';
|
||||
SELECT * FROM (SELECT number AS key FROM numbers(5)) s1 LEFT JOIN dict_flat d ON s1.key = d.key ORDER BY s1.key;
|
||||
SELECT 'flat: left';
|
||||
SELECT * FROM (SELECT number AS key FROM numbers(5)) s1 LEFT JOIN dict_flat d USING(key) ORDER BY key;
|
||||
SELECT 'flat: any left';
|
||||
SELECT * FROM (SELECT number AS key FROM numbers(5)) s1 ANY LEFT JOIN dict_flat d USING(key) ORDER BY key;
|
||||
SELECT 'flat: semi left';
|
||||
SELECT * FROM (SELECT number AS key FROM numbers(5)) s1 SEMI JOIN dict_flat d USING(key) ORDER BY key;
|
||||
SELECT 'flat: anti left';
|
||||
SELECT * FROM (SELECT number AS key FROM numbers(5)) s1 ANTI JOIN dict_flat d USING(key) ORDER BY key;
|
||||
SELECT 'flat: inner';
|
||||
SELECT * FROM (SELECT number AS key FROM numbers(2)) s1 JOIN dict_flat d USING(key);
|
||||
SELECT 'flat: inner on';
|
||||
SELECT * FROM (SELECT number AS k FROM numbers(100)) s1 JOIN dict_flat d ON k = key ORDER BY k;
|
||||
|
||||
SET join_use_nulls = 1;
|
||||
|
||||
SELECT 'hashed: left on';
|
||||
SELECT * FROM (SELECT number AS key FROM numbers(5)) s1 LEFT JOIN dict_hashed d ON s1.key = d.key ORDER BY s1.key;
|
||||
SELECT 'hashed: left';
|
||||
SELECT * FROM (SELECT number AS key FROM numbers(5)) s1 LEFT JOIN dict_hashed d USING(key) ORDER BY key;
|
||||
SELECT 'hashed: any left';
|
||||
SELECT * FROM (SELECT number AS key FROM numbers(5)) s1 ANY LEFT JOIN dict_hashed d USING(key) ORDER BY key;
|
||||
SELECT 'hashed: semi left';
|
||||
SELECT * FROM (SELECT number AS key FROM numbers(5)) s1 SEMI JOIN dict_hashed d USING(key) ORDER BY key;
|
||||
SELECT 'hashed: anti left';
|
||||
SELECT * FROM (SELECT number AS key FROM numbers(5)) s1 ANTI JOIN dict_hashed d USING(key) ORDER BY key;
|
||||
SELECT 'hashed: inner';
|
||||
SELECT * FROM (SELECT number AS key FROM numbers(2)) s1 JOIN dict_hashed d USING(key);
|
||||
SELECT 'hashed: inner on';
|
||||
SELECT * FROM (SELECT number AS k FROM numbers(100)) s1 JOIN dict_hashed d ON k = key ORDER BY k;
|
||||
|
||||
SELECT 'complex_cache (smoke)';
|
||||
SELECT * FROM (SELECT number AS key FROM numbers(5)) s1 LEFT JOIN dict_complex_cache d ON s1.key = d.key ORDER BY s1.key;
|
||||
|
||||
SELECT 'not optimized (smoke)';
|
||||
SELECT * FROM (SELECT number AS key FROM numbers(2)) s1 RIGHT JOIN dict_flat d USING(key) ORDER BY key;
|
||||
SELECT '-';
|
||||
SELECT * FROM (SELECT number AS key FROM numbers(2)) s1 RIGHT JOIN dict_flat d ON s1.key = d.key ORDER BY d.key;
|
||||
SELECT '-';
|
||||
SELECT * FROM (SELECT number + 2 AS key FROM numbers(4)) s1 FULL JOIN dict_flat d USING(key) ORDER BY s1.key, d.key;
|
||||
SELECT '-';
|
||||
SELECT * FROM (SELECT number AS key FROM numbers(2)) s1 ANY INNER JOIN dict_flat d USING(key);
|
||||
SELECT '-';
|
||||
SELECT * FROM (SELECT number AS key FROM numbers(2)) s1 ANY RIGHT JOIN dict_flat d USING(key);
|
||||
SELECT '-';
|
||||
SELECT * FROM (SELECT number AS key FROM numbers(2)) s1 SEMI RIGHT JOIN dict_flat d USING(key);
|
||||
SELECT '-';
|
||||
SELECT * FROM (SELECT number AS key FROM numbers(2)) s1 ANTI RIGHT JOIN dict_flat d USING(key);
|
||||
|
||||
DROP DICTIONARY dict_flat;
|
||||
DROP DICTIONARY dict_hashed;
|
||||
DROP DICTIONARY dict_complex_cache;
|
||||
|
||||
DROP TABLE t1;
|
||||
DROP DATABASE IF EXISTS db_01115;
|
@ -1,10 +1,7 @@
|
||||
SET any_join_distinct_right_table_keys = 1;
|
||||
|
||||
USE test;
|
||||
|
||||
DROP TABLE IF EXISTS join;
|
||||
CREATE TABLE join (UserID UInt64, loyalty Int8) ENGINE = Join(ANY, INNER, UserID)
|
||||
SETTINGS any_join_distinct_right_table_keys = 1;
|
||||
CREATE TABLE join (UserID UInt64, loyalty Int8) ENGINE = Join(SEMI, LEFT, UserID);
|
||||
|
||||
INSERT INTO join
|
||||
SELECT
|
||||
@ -20,17 +17,17 @@ HAVING (yandex + google) > 10;
|
||||
SELECT
|
||||
loyalty,
|
||||
count()
|
||||
FROM hits ANY INNER JOIN join USING UserID
|
||||
FROM hits SEMI LEFT JOIN join USING UserID
|
||||
GROUP BY loyalty
|
||||
ORDER BY loyalty ASC;
|
||||
|
||||
DETACH TABLE join;
|
||||
ATTACH TABLE join (UserID UInt64, loyalty Int8) ENGINE = Join(ANY, INNER, UserID);
|
||||
ATTACH TABLE join (UserID UInt64, loyalty Int8) ENGINE = Join(SEMI, LEFT, UserID);
|
||||
|
||||
SELECT
|
||||
loyalty,
|
||||
count()
|
||||
FROM hits ANY INNER JOIN join USING UserID
|
||||
FROM hits SEMI LEFT JOIN join USING UserID
|
||||
GROUP BY loyalty
|
||||
ORDER BY loyalty ASC;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user