From 6bdbf75e3033adc94105750236df397e5ec5f2d7 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 29 Mar 2019 21:07:22 +0300 Subject: [PATCH] fix crash when joining nulluble vs not nullable --- dbms/src/Interpreters/Join.cpp | 64 +++++++++++++++++-- .../00853_join_with_nulls_crash.reference | 16 +++++ .../00853_join_with_nulls_crash.sql | 57 +++++++++++++++++ 3 files changed, 132 insertions(+), 5 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00853_join_with_nulls_crash.reference create mode 100644 dbms/tests/queries/0_stateless/00853_join_with_nulls_crash.sql diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index ddc2173d75b..3bc68c2bc06 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -1236,8 +1236,12 @@ public: makeResultSampleBlock(left_sample_block, right_sample_block, columns_added_by_join, key_positions_left, is_left_key, left_to_right_key_map); + auto nullability_changes = getNullabilityChanges(parent.sample_block_with_keys, result_sample_block, + key_positions_left, left_to_right_key_map); + column_indices_left.reserve(left_sample_block.columns() - key_names_left.size()); column_indices_keys_and_right.reserve(key_names_left.size() + right_sample_block.columns()); + key_nullability_changes.reserve(key_positions_left.size()); /// Use right key columns if present. @note left & right key columns could have different nullability. for (size_t key_pos : key_positions_left) @@ -1250,11 +1254,12 @@ public: auto it = left_to_right_key_map.find(key_pos); if (it != left_to_right_key_map.end()) { - column_indices_keys_and_right.push_back(it->second); column_indices_left.push_back(key_pos); + key_pos = it->second; } - else - column_indices_keys_and_right.push_back(key_pos); + + column_indices_keys_and_right.push_back(key_pos); + key_nullability_changes.push_back(nullability_changes.count(key_pos)); } for (size_t i = 0; i < left_sample_block.columns(); ++i) @@ -1281,7 +1286,7 @@ protected: if (parent.dispatch([&](auto, auto strictness, auto & map) { block = createBlock(map); })) ; else - throw Exception("Logical error: unknown JOIN strictness (must be ANY or ALL)", ErrorCodes::LOGICAL_ERROR); + throw Exception("Logical error: unknown JOIN strictness (must be on of: ANY, ALL, ASOF)", ErrorCodes::LOGICAL_ERROR); return block; } @@ -1290,11 +1295,13 @@ private: UInt64 max_block_size; Block result_sample_block; - /// Indices of columns in result_sample_block that come from the left-side table (except key columns). + /// Indices of columns in result_sample_block that come from the left-side table (except shared right+left key columns). ColumnNumbers column_indices_left; /// Indices of key columns in result_sample_block or columns that come from the right-side table. /// Order is significant: it is the same as the order of columns in the blocks of the right-side table that are saved in parent.blocks. ColumnNumbers column_indices_keys_and_right; + /// Which key columns need change nullability (right is nullable and left is not or vice versa) + std::vector key_nullability_changes; std::unique_ptr> position; /// type erasure @@ -1350,6 +1357,9 @@ private: MutableColumns columns_left = columnsForIndex(result_sample_block, column_indices_left); MutableColumns columns_keys_and_right = columnsForIndex(result_sample_block, column_indices_keys_and_right); + /// Temporary change destination key columns' nullability according to mapped block + changeNullability(columns_keys_and_right, key_nullability_changes); + size_t rows_added = 0; switch (parent.type) @@ -1368,6 +1378,9 @@ private: if (!rows_added) return {}; + /// Revert columns nullability + changeNullability(columns_keys_and_right, key_nullability_changes); + Block res = result_sample_block.cloneEmpty(); /// @note it's possible to make ColumnConst here and materialize it later @@ -1425,6 +1438,47 @@ private: return rows_added; } + + static std::unordered_set getNullabilityChanges(const Block & sample_block_with_keys, const Block & out_block, + const std::vector & key_positions, + const std::unordered_map & left_to_right_key_map) + { + std::unordered_set nullability_changes; + + for (size_t i = 0; i < key_positions.size(); ++i) + { + size_t key_pos = key_positions[i]; + + auto it = left_to_right_key_map.find(key_pos); + if (it != left_to_right_key_map.end()) + key_pos = it->second; + + const auto & dst = out_block.getByPosition(key_pos).column; + const auto & src = sample_block_with_keys.getByPosition(i).column; + if (dst->isColumnNullable() != src->isColumnNullable()) + nullability_changes.insert(key_pos); + } + + return nullability_changes; + } + + static void changeNullability(MutableColumns & columns, const std::vector & changes_bitmap) + { + /// @note changes_bitmap.size() <= columns.size() + for (size_t i = 0; i < changes_bitmap.size(); ++i) + { + if (changes_bitmap[i]) + { + ColumnPtr column = std::move(columns[i]); + if (column->isColumnNullable()) + column = static_cast(*column).getNestedColumnPtr(); + else + column = makeNullable(column); + + columns[i] = (*std::move(column)).mutate(); + } + } + } }; diff --git a/dbms/tests/queries/0_stateless/00853_join_with_nulls_crash.reference b/dbms/tests/queries/0_stateless/00853_join_with_nulls_crash.reference new file mode 100644 index 00000000000..7ee48046f3a --- /dev/null +++ b/dbms/tests/queries/0_stateless/00853_join_with_nulls_crash.reference @@ -0,0 +1,16 @@ +foo \N 2 0 Nullable(String) Nullable(String) +bar bar 1 2 Nullable(String) Nullable(String) +\N 0 1 Nullable(String) Nullable(String) +\N test 0 1 Nullable(String) Nullable(String) +foo \N 2 0 Nullable(String) Nullable(String) +bar bar 1 2 Nullable(String) Nullable(String) +\N 0 1 Nullable(String) Nullable(String) +\N test 0 1 Nullable(String) Nullable(String) +foo 2 0 String Nullable(String) +bar bar 1 2 String Nullable(String) + test 0 1 String Nullable(String) +bar bar 1 2 String Nullable(String) + test 0 1 String Nullable(String) +foo 2 0 String +bar 1 2 String +test 0 1 String diff --git a/dbms/tests/queries/0_stateless/00853_join_with_nulls_crash.sql b/dbms/tests/queries/0_stateless/00853_join_with_nulls_crash.sql new file mode 100644 index 00000000000..3fe4b5a1f0b --- /dev/null +++ b/dbms/tests/queries/0_stateless/00853_join_with_nulls_crash.sql @@ -0,0 +1,57 @@ +USE test; + +DROP TABLE IF EXISTS table_a; +DROP TABLE IF EXISTS table_b; + +CREATE TABLE table_a ( + event_id UInt64, + something String, + other Nullable(String) +) ENGINE = MergeTree ORDER BY (event_id); + +CREATE TABLE table_b ( + event_id UInt64, + something Nullable(String), + other String +) ENGINE = MergeTree ORDER BY (event_id); + +INSERT INTO table_a VALUES (1, 'foo', 'foo'), (2, 'foo', 'foo'), (3, 'bar', 'bar'); +INSERT INTO table_b VALUES (1, 'bar', 'bar'), (2, 'bar', 'bar'), (3, 'test', 'test'), (4, NULL, ''); + +SELECT s1.other, s2.other, count_a, count_b, toTypeName(s1.other), toTypeName(s2.other) FROM + ( SELECT other, count() AS count_a FROM table_a GROUP BY other ) s1 +ALL FULL JOIN + ( SELECT other, count() AS count_b FROM table_b GROUP BY other ) s2 +ON s1.other = s2.other +ORDER BY count_a DESC; + +SELECT s1.other, s2.other, count_a, count_b, toTypeName(s1.other), toTypeName(s2.other) FROM + ( SELECT other, count() AS count_a FROM table_a GROUP BY other ) s1 +ALL FULL JOIN + ( SELECT other, count() AS count_b FROM table_b GROUP BY other ) s2 +USING other +ORDER BY count_a DESC; + +SELECT s1.something, s2.something, count_a, count_b, toTypeName(s1.something), toTypeName(s2.something) FROM + ( SELECT something, count() AS count_a FROM table_a GROUP BY something ) s1 +ALL FULL JOIN + ( SELECT something, count() AS count_b FROM table_b GROUP BY something ) s2 +ON s1.something = s2.something +ORDER BY count_a DESC; + +SELECT s1.something, s2.something, count_a, count_b, toTypeName(s1.something), toTypeName(s2.something) FROM + ( SELECT something, count() AS count_a FROM table_a GROUP BY something ) s1 +ALL RIGHT JOIN + ( SELECT something, count() AS count_b FROM table_b GROUP BY something ) s2 +USING (something) +ORDER BY count_a DESC; + +SELECT something, count_a, count_b, toTypeName(something) FROM + ( SELECT something, count() AS count_a FROM table_a GROUP BY something ) +ALL FULL JOIN + ( SELECT something, count() AS count_b FROM table_b GROUP BY something ) +USING (something) +ORDER BY count_a DESC; + +DROP TABLE table_a; +DROP TABLE table_b;