From d7a2f452999ceb3b812e5becc9ffc1a8a7dad983 Mon Sep 17 00:00:00 2001 From: chertus Date: Tue, 17 Dec 2019 18:02:42 +0300 Subject: [PATCH] Fix crash in FULL JOIN with LowCard in key (attempt 2) --- dbms/src/Interpreters/Join.cpp | 24 ++++++++-------- dbms/src/Interpreters/join_common.cpp | 28 +++++++++++++++++-- dbms/src/Interpreters/join_common.h | 4 ++- .../0_stateless/01049_join_lc_crach.reference | 6 ++++ .../0_stateless/01049_join_lc_crach.sql | 15 ++++++++++ 5 files changed, 61 insertions(+), 16 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/01049_join_lc_crach.reference create mode 100644 dbms/tests/queries/0_stateless/01049_join_lc_crach.sql diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 43b792d70e7..4da687ac1e4 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -501,15 +501,12 @@ void Join::initRightBlockStructure() JoinCommon::convertColumnsToNullable(saved_block_sample, (isFull(kind) ? right_table_keys.columns() : 0)); } -Block Join::structureRightBlock(const Block & source_block) const +Block Join::structureRightBlock(const Block & block) const { - /// Rare case, when joined columns are constant. To avoid code bloat, simply materialize them. - Block block = materializeBlock(source_block); - Block structured_block; for (auto & sample_column : saved_block_sample.getColumnsWithTypeAndName()) { - auto & column = block.getByName(sample_column.name); + ColumnWithTypeAndName column = block.getByName(sample_column.name); if (sample_column.column->isNullable()) JoinCommon::convertColumnToNullable(column); structured_block.insert(column); @@ -518,14 +515,16 @@ Block Join::structureRightBlock(const Block & source_block) const return structured_block; } -bool Join::addJoinedBlock(const Block & block) +bool Join::addJoinedBlock(const Block & source_block) { if (empty()) throw Exception("Logical error: Join was not initialized", ErrorCodes::LOGICAL_ERROR); - /// Rare case, when keys are constant. To avoid code bloat, simply materialize them. - Columns materialized_columns; - ColumnRawPtrs key_columns = JoinCommon::temporaryMaterializeColumns(block, key_names_right, materialized_columns); + /// There's no optimization for right side const columns. Remove constness if any. + Block block = materializeBlock(source_block); + size_t rows = block.rows(); + + ColumnRawPtrs key_columns = JoinCommon::materializeColumnsInplace(block, key_names_right); /// We will insert to the map only keys, where all components are not NULL. ConstNullMapPtr null_map{}; @@ -549,7 +548,6 @@ bool Join::addJoinedBlock(const Block & block) blocks.emplace_back(std::move(structured_block)); Block * stored_block = &blocks.back(); - size_t rows = block.rows(); if (rows) has_no_rows_in_maps = false; @@ -886,9 +884,9 @@ void Join::joinBlockImpl( constexpr bool need_replication = is_all_join || (is_any_join && right) || (is_semi_join && right); constexpr bool need_filter = !need_replication && (inner || right || (is_semi_join && left) || (is_anti_join && left)); - /// Rare case, when keys are constant. To avoid code bloat, simply materialize them. - Columns materialized_columns; - ColumnRawPtrs key_columns = JoinCommon::temporaryMaterializeColumns(block, key_names_left, materialized_columns); + /// Rare case, when keys are constant or low cardinality. To avoid code bloat, simply materialize them. + Columns materialized_keys = JoinCommon::materializeColumns(block, key_names_left); + ColumnRawPtrs key_columns = JoinCommon::getRawPointers(materialized_keys); /// Keys with NULL value in any column won't join to anything. ConstNullMapPtr null_map{}; diff --git a/dbms/src/Interpreters/join_common.cpp b/dbms/src/Interpreters/join_common.cpp index 852867f7775..bb61870daca 100644 --- a/dbms/src/Interpreters/join_common.cpp +++ b/dbms/src/Interpreters/join_common.cpp @@ -48,19 +48,43 @@ void removeColumnNullability(ColumnWithTypeAndName & column) } } -ColumnRawPtrs temporaryMaterializeColumns(const Block & block, const Names & names, Columns & materialized) +ColumnRawPtrs materializeColumnsInplace(Block & block, const Names & names) { ColumnRawPtrs ptrs; ptrs.reserve(names.size()); + + for (auto & column_name : names) + { + auto & column = block.getByName(column_name).column; + column = recursiveRemoveLowCardinality(column->convertToFullColumnIfConst()); + ptrs.push_back(column.get()); + } + + return ptrs; +} + +Columns materializeColumns(const Block & block, const Names & names) +{ + Columns materialized; materialized.reserve(names.size()); for (auto & column_name : names) { const auto & src_column = block.getByName(column_name).column; materialized.emplace_back(recursiveRemoveLowCardinality(src_column->convertToFullColumnIfConst())); - ptrs.push_back(materialized.back().get()); } + return materialized; +} + +ColumnRawPtrs getRawPointers(const Columns & columns) +{ + ColumnRawPtrs ptrs; + ptrs.reserve(columns.size()); + + for (auto & column : columns) + ptrs.push_back(column.get()); + return ptrs; } diff --git a/dbms/src/Interpreters/join_common.h b/dbms/src/Interpreters/join_common.h index 85c24515b41..7c5ec0dc693 100644 --- a/dbms/src/Interpreters/join_common.h +++ b/dbms/src/Interpreters/join_common.h @@ -16,7 +16,9 @@ namespace JoinCommon void convertColumnToNullable(ColumnWithTypeAndName & column); void convertColumnsToNullable(Block & block, size_t starting_pos = 0); void removeColumnNullability(ColumnWithTypeAndName & column); -ColumnRawPtrs temporaryMaterializeColumns(const Block & block, const Names & names, Columns & materialized); +Columns materializeColumns(const Block & block, const Names & names); +ColumnRawPtrs materializeColumnsInplace(Block & block, const Names & names); +ColumnRawPtrs getRawPointers(const Columns & columns); void removeLowCardinalityInplace(Block & block); /// Split key and other columns by keys name list diff --git a/dbms/tests/queries/0_stateless/01049_join_lc_crach.reference b/dbms/tests/queries/0_stateless/01049_join_lc_crach.reference new file mode 100644 index 00000000000..e7e860b7ce8 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01049_join_lc_crach.reference @@ -0,0 +1,6 @@ +a 1 +b 0 +a 1 2 +b 0 3 +a 1 a 2 + 0 b 3 diff --git a/dbms/tests/queries/0_stateless/01049_join_lc_crach.sql b/dbms/tests/queries/0_stateless/01049_join_lc_crach.sql new file mode 100644 index 00000000000..f66b2610d85 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01049_join_lc_crach.sql @@ -0,0 +1,15 @@ +DROP TABLE IF EXISTS Alpha; +DROP TABLE IF EXISTS Beta; + +CREATE TABLE Alpha (foo String, bar UInt64) ENGINE = Memory; +CREATE TABLE Beta (foo LowCardinality(String), baz UInt64) ENGINE = Memory; + +INSERT INTO Alpha VALUES ('a', 1); +INSERT INTO Beta VALUES ('a', 2), ('b', 3); + +SELECT * FROM Alpha FULL JOIN (SELECT 'b' as foo) USING (foo); +SELECT * FROM Alpha FULL JOIN Beta USING (foo); +SELECT * FROM Alpha FULL JOIN Beta ON Alpha.foo = Beta.foo; + +DROP TABLE Alpha; +DROP TABLE Beta;