From 29178e26daa4cb88690058145ae3a5ab5d724c7f Mon Sep 17 00:00:00 2001 From: Artem Zuikov Date: Tue, 30 Jun 2020 14:13:43 +0300 Subject: [PATCH] fix low card types in merge join (#12035) --- src/Interpreters/MergeJoin.cpp | 5 +- src/Interpreters/join_common.cpp | 10 +++ src/Interpreters/join_common.h | 1 + ...00800_low_cardinality_merge_join.reference | 41 ++++++++++ .../00800_low_cardinality_merge_join.sql | 30 ++++++++ ...01353_low_cardinality_join_types.reference | 36 +++++++++ .../01353_low_cardinality_join_types.sql | 75 +++++++++++++++++++ 7 files changed, 195 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/00800_low_cardinality_merge_join.reference create mode 100644 tests/queries/0_stateless/00800_low_cardinality_merge_join.sql create mode 100644 tests/queries/0_stateless/01353_low_cardinality_join_types.reference create mode 100644 tests/queries/0_stateless/01353_low_cardinality_join_types.sql diff --git a/src/Interpreters/MergeJoin.cpp b/src/Interpreters/MergeJoin.cpp index bb054169e71..1478c36dd23 100644 --- a/src/Interpreters/MergeJoin.cpp +++ b/src/Interpreters/MergeJoin.cpp @@ -396,7 +396,6 @@ MergeJoin::MergeJoin(std::shared_ptr table_join_, const Block & right if (required_right_keys.count(column.name)) right_columns_to_add.insert(ColumnWithTypeAndName{nullptr, column.type, column.name}); - JoinCommon::removeLowCardinalityInplace(right_columns_to_add); JoinCommon::createMissedColumns(right_columns_to_add); if (nullable_right_side) @@ -513,7 +512,7 @@ bool MergeJoin::saveRightBlock(Block && block) bool MergeJoin::addJoinedBlock(const Block & src_block, bool) { Block block = materializeBlock(src_block); - JoinCommon::removeLowCardinalityInplace(block); + JoinCommon::removeLowCardinalityInplace(block, table_join->keyNamesRight()); sortBlock(block, right_sort_description); return saveRightBlock(std::move(block)); @@ -525,7 +524,7 @@ void MergeJoin::joinBlock(Block & block, ExtraBlockPtr & not_processed) { JoinCommon::checkTypesOfKeys(block, table_join->keyNamesLeft(), right_table_keys, table_join->keyNamesRight()); materializeBlockInplace(block); - JoinCommon::removeLowCardinalityInplace(block); + JoinCommon::removeLowCardinalityInplace(block, table_join->keyNamesLeft()); sortBlock(block, left_sort_description); } diff --git a/src/Interpreters/join_common.cpp b/src/Interpreters/join_common.cpp index 6dd3a202d4d..a17d3b43e69 100644 --- a/src/Interpreters/join_common.cpp +++ b/src/Interpreters/join_common.cpp @@ -104,6 +104,16 @@ void removeLowCardinalityInplace(Block & block) } } +void removeLowCardinalityInplace(Block & block, const Names & names) +{ + for (const String & column_name : names) + { + auto & col = block.getByName(column_name); + col.column = recursiveRemoveLowCardinality(col.column); + col.type = recursiveRemoveLowCardinality(col.type); + } +} + void splitAdditionalColumns(const Block & sample_block, const Names & key_names, Block & block_keys, Block & block_others) { block_others = materializeBlock(sample_block); diff --git a/src/Interpreters/join_common.h b/src/Interpreters/join_common.h index 47fa082e700..81eb0dfa688 100644 --- a/src/Interpreters/join_common.h +++ b/src/Interpreters/join_common.h @@ -20,6 +20,7 @@ Columns materializeColumns(const Block & block, const Names & names); ColumnRawPtrs materializeColumnsInplace(Block & block, const Names & names); ColumnRawPtrs getRawPointers(const Columns & columns); void removeLowCardinalityInplace(Block & block); +void removeLowCardinalityInplace(Block & block, const Names & names); /// Split key and other columns by keys name list void splitAdditionalColumns(const Block & sample_block, const Names & key_names, Block & block_keys, Block & block_others); diff --git a/tests/queries/0_stateless/00800_low_cardinality_merge_join.reference b/tests/queries/0_stateless/00800_low_cardinality_merge_join.reference new file mode 100644 index 00000000000..8e032c0a542 --- /dev/null +++ b/tests/queries/0_stateless/00800_low_cardinality_merge_join.reference @@ -0,0 +1,41 @@ +0 +0 +0 +0 +0 +0 +0 +0 +0 +- +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +0 0 +- +0 1 +1 2 +2 0 +0 1 +1 2 +2 0 +0 1 +1 2 +2 0 +0 1 +1 2 +2 0 +0 1 +1 2 +2 0 +0 1 +1 2 +2 \N +0 1 +1 2 +2 \N diff --git a/tests/queries/0_stateless/00800_low_cardinality_merge_join.sql b/tests/queries/0_stateless/00800_low_cardinality_merge_join.sql new file mode 100644 index 00000000000..1181ee453a6 --- /dev/null +++ b/tests/queries/0_stateless/00800_low_cardinality_merge_join.sql @@ -0,0 +1,30 @@ +set join_algorithm = 'partial_merge'; + +select * from (select dummy as val from system.one) s1 any left join (select dummy as val from system.one) s2 using val; +select * from (select toLowCardinality(dummy) as val from system.one) s1 any left join (select dummy as val from system.one) s2 using val; +select * from (select dummy as val from system.one) s1 any left join (select toLowCardinality(dummy) as val from system.one) s2 using val; +select * from (select toLowCardinality(dummy) as val from system.one) s1 any left join (select toLowCardinality(dummy) as val from system.one) s2 using val; +select * from (select toLowCardinality(toNullable(dummy)) as val from system.one) s1 any left join (select dummy as val from system.one) s2 using val; +select * from (select dummy as val from system.one) s1 any left join (select toLowCardinality(toNullable(dummy)) as val from system.one) s2 using val; +select * from (select toLowCardinality(toNullable(dummy)) as val from system.one) s1 any left join (select toLowCardinality(dummy) as val from system.one) s2 using val; +select * from (select toLowCardinality(dummy) as val from system.one) s1 any left join (select toLowCardinality(toNullable(dummy)) as val from system.one) s2 using val; +select * from (select toLowCardinality(toNullable(dummy)) as val from system.one) s1 any left join (select toLowCardinality(toNullable(dummy)) as val from system.one) s2 using val; +select '-'; +select * from (select dummy as val from system.one) s1 any left join (select dummy as val from system.one) s2 on val + 0 = val * 1; -- { serverError 352 } +select * from (select dummy as val from system.one) s1 any left join (select dummy as rval from system.one) s2 on val + 0 = rval * 1; +select * from (select toLowCardinality(dummy) as val from system.one) s1 any left join (select dummy as rval from system.one) s2 on val + 0 = rval * 1; +select * from (select dummy as val from system.one) s1 any left join (select toLowCardinality(dummy) as rval from system.one) s2 on val + 0 = rval * 1; +select * from (select toLowCardinality(dummy) as val from system.one) s1 any left join (select toLowCardinality(dummy) as rval from system.one) s2 on val + 0 = rval * 1; +select * from (select toLowCardinality(toNullable(dummy)) as val from system.one) s1 any left join (select dummy as rval from system.one) s2 on val + 0 = rval * 1; +select * from (select dummy as val from system.one) s1 any left join (select toLowCardinality(toNullable(dummy)) as rval from system.one) s2 on val + 0 = rval * 1; +select * from (select toLowCardinality(toNullable(dummy)) as val from system.one) s1 any left join (select toLowCardinality(dummy) as rval from system.one) s2 on val + 0 = rval * 1; +select * from (select toLowCardinality(dummy) as val from system.one) s1 any left join (select toLowCardinality(toNullable(dummy)) as rval from system.one) s2 on val + 0 = rval * 1; +select * from (select toLowCardinality(toNullable(dummy)) as val from system.one) s1 any left join (select toLowCardinality(toNullable(dummy)) as rval from system.one) s2 on val + 0 = rval * 1; +select '-'; +select * from (select number as l from system.numbers limit 3) s1 any left join (select number as r from system.numbers limit 3) s2 on l + 1 = r * 1; +select * from (select toLowCardinality(number) as l from system.numbers limit 3) s1 any left join (select number as r from system.numbers limit 3) s2 on l + 1 = r * 1; +select * from (select number as l from system.numbers limit 3) s1 any left join (select toLowCardinality(number) as r from system.numbers limit 3) s2 on l + 1 = r * 1; +select * from (select toLowCardinality(number) as l from system.numbers limit 3) s1 any left join (select toLowCardinality(number) as r from system.numbers limit 3) s2 on l + 1 = r * 1; +select * from (select toLowCardinality(toNullable(number)) as l from system.numbers limit 3) s1 any left join (select toLowCardinality(number) as r from system.numbers limit 3) s2 on l + 1 = r * 1; +select * from (select toLowCardinality(number) as l from system.numbers limit 3) s1 any left join (select toLowCardinality(toNullable(number)) as r from system.numbers limit 3) s2 on l + 1 = r * 1; +select * from (select toLowCardinality(toNullable(number)) as l from system.numbers limit 3) s1 any left join (select toLowCardinality(toNullable(number)) as r from system.numbers limit 3) s2 on l + 1 = r * 1; diff --git a/tests/queries/0_stateless/01353_low_cardinality_join_types.reference b/tests/queries/0_stateless/01353_low_cardinality_join_types.reference new file mode 100644 index 00000000000..85d3f3d598b --- /dev/null +++ b/tests/queries/0_stateless/01353_low_cardinality_join_types.reference @@ -0,0 +1,36 @@ +- +LowCardinality(UInt64) UInt64 String LowCardinality(String) +- +UInt64 LowCardinality(UInt64) LowCardinality(String) String +- +LowCardinality(UInt64) LowCardinality(UInt64) LowCardinality(String) LowCardinality(String) +- +LowCardinality(UInt64) UInt64 String LowCardinality(String) +LowCardinality(UInt64) UInt64 String LowCardinality(String) +LowCardinality(UInt64) UInt64 String LowCardinality(String) +- +UInt64 LowCardinality(UInt64) LowCardinality(String) String +UInt64 LowCardinality(UInt64) LowCardinality(String) String +UInt64 LowCardinality(UInt64) LowCardinality(String) String +- +LowCardinality(UInt64) LowCardinality(UInt64) LowCardinality(String) LowCardinality(String) +LowCardinality(UInt64) LowCardinality(UInt64) LowCardinality(String) LowCardinality(String) +LowCardinality(UInt64) LowCardinality(UInt64) LowCardinality(String) LowCardinality(String) +- +LowCardinality(UInt64) UInt64 String LowCardinality(String) +- +UInt64 LowCardinality(UInt64) LowCardinality(String) String +- +LowCardinality(UInt64) LowCardinality(UInt64) LowCardinality(String) LowCardinality(String) +- +LowCardinality(UInt64) UInt64 String LowCardinality(String) +LowCardinality(UInt64) UInt64 String LowCardinality(String) +LowCardinality(UInt64) UInt64 String LowCardinality(String) +- +UInt64 LowCardinality(UInt64) LowCardinality(String) String +UInt64 LowCardinality(UInt64) LowCardinality(String) String +UInt64 LowCardinality(UInt64) LowCardinality(String) String +- +LowCardinality(UInt64) LowCardinality(UInt64) LowCardinality(String) LowCardinality(String) +LowCardinality(UInt64) LowCardinality(UInt64) LowCardinality(String) LowCardinality(String) +LowCardinality(UInt64) LowCardinality(UInt64) LowCardinality(String) LowCardinality(String) diff --git a/tests/queries/0_stateless/01353_low_cardinality_join_types.sql b/tests/queries/0_stateless/01353_low_cardinality_join_types.sql new file mode 100644 index 00000000000..91ebe97fa48 --- /dev/null +++ b/tests/queries/0_stateless/01353_low_cardinality_join_types.sql @@ -0,0 +1,75 @@ +set join_algorithm = 'hash'; + +select '-'; +select toTypeName(materialize(js1.k)), toTypeName(materialize(js2.k)), toTypeName(materialize(js1.s)), toTypeName(materialize(js2.s)) +from (select toLowCardinality(number) k, toString(number) s from numbers(2)) as js1 +join (select number+1 k, toLowCardinality(toString(number+1)) s from numbers(2)) as js2 +using k order by js1.k, js2.k; + +select '-'; +select toTypeName(materialize(js1.k)), toTypeName(materialize(js2.k)), toTypeName(materialize(js1.s)), toTypeName(materialize(js2.s)) +from (select number k, toLowCardinality(toString(number)) s from numbers(2)) as js1 +join (select toLowCardinality(number+1) k, toString(number+1) s from numbers(2)) as js2 +using k order by js1.k, js2.k; + +select '-'; +select toTypeName(materialize(js1.k)), toTypeName(materialize(js2.k)), toTypeName(materialize(js1.s)), toTypeName(materialize(js2.s)) +from (select toLowCardinality(number) k, toLowCardinality(toString(number)) s from numbers(2)) as js1 +join (select toLowCardinality(number+1) k, toLowCardinality(toString(number+1)) s from numbers(2)) as js2 +using k order by js1.k, js2.k; + +select '-'; +select toTypeName(materialize(js1.k)), toTypeName(materialize(js2.k)), toTypeName(materialize(js1.s)), toTypeName(materialize(js2.s)) +from (select toLowCardinality(number) k, toString(number) s from numbers(2)) as js1 +full join (select number+1 k, toLowCardinality(toString(number+1)) s from numbers(2)) as js2 +using k order by js1.k, js2.k; + +select '-'; +select toTypeName(materialize(js1.k)), toTypeName(materialize(js2.k)), toTypeName(materialize(js1.s)), toTypeName(materialize(js2.s)) +from (select number k, toLowCardinality(toString(number)) s from numbers(2)) as js1 +full join (select toLowCardinality(number+1) k, toString(number+1) s from numbers(2)) as js2 +using k order by js1.k, js2.k; + +select '-'; +select toTypeName(materialize(js1.k)), toTypeName(materialize(js2.k)), toTypeName(materialize(js1.s)), toTypeName(materialize(js2.s)) +from (select toLowCardinality(number) k, toLowCardinality(toString(number)) s from numbers(2)) as js1 +full join (select toLowCardinality(number+1) k, toLowCardinality(toString(number+1)) s from numbers(2)) as js2 +using k order by js1.k, js2.k; + +set join_algorithm = 'prefer_partial_merge'; + +select '-'; +select toTypeName(materialize(js1.k)), toTypeName(materialize(js2.k)), toTypeName(materialize(js1.s)), toTypeName(materialize(js2.s)) +from (select toLowCardinality(number) k, toString(number) s from numbers(2)) as js1 +join (select number+1 k, toLowCardinality(toString(number+1)) s from numbers(2)) as js2 +using k order by js1.k, js2.k; + +select '-'; +select toTypeName(materialize(js1.k)), toTypeName(materialize(js2.k)), toTypeName(materialize(js1.s)), toTypeName(materialize(js2.s)) +from (select number k, toLowCardinality(toString(number)) s from numbers(2)) as js1 +join (select toLowCardinality(number+1) k, toString(number+1) s from numbers(2)) as js2 +using k order by js1.k, js2.k; + +select '-'; +select toTypeName(materialize(js1.k)), toTypeName(materialize(js2.k)), toTypeName(materialize(js1.s)), toTypeName(materialize(js2.s)) +from (select toLowCardinality(number) k, toLowCardinality(toString(number)) s from numbers(2)) as js1 +join (select toLowCardinality(number+1) k, toLowCardinality(toString(number+1)) s from numbers(2)) as js2 +using k order by js1.k, js2.k; + +select '-'; +select toTypeName(materialize(js1.k)), toTypeName(materialize(js2.k)), toTypeName(materialize(js1.s)), toTypeName(materialize(js2.s)) +from (select toLowCardinality(number) k, toString(number) s from numbers(2)) as js1 +full join (select number+1 k, toLowCardinality(toString(number+1)) s from numbers(2)) as js2 +using k order by js1.k, js2.k; + +select '-'; +select toTypeName(materialize(js1.k)), toTypeName(materialize(js2.k)), toTypeName(materialize(js1.s)), toTypeName(materialize(js2.s)) +from (select number k, toLowCardinality(toString(number)) s from numbers(2)) as js1 +full join (select toLowCardinality(number+1) k, toString(number+1) s from numbers(2)) as js2 +using k order by js1.k, js2.k; + +select '-'; +select toTypeName(materialize(js1.k)), toTypeName(materialize(js2.k)), toTypeName(materialize(js1.s)), toTypeName(materialize(js2.s)) +from (select toLowCardinality(number) k, toLowCardinality(toString(number)) s from numbers(2)) as js1 +full join (select toLowCardinality(number+1) k, toLowCardinality(toString(number+1)) s from numbers(2)) as js2 +using k order by js1.k, js2.k;