From 73d6784c5999b010122ece59e0cf6b008d0d2d16 Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 3 Jul 2019 22:06:34 +0300 Subject: [PATCH 1/4] join right table nulls --- dbms/src/Common/ErasedType.h | 24 ++++ dbms/src/Interpreters/Join.cpp | 111 ++++++++++++------ dbms/src/Interpreters/Join.h | 7 +- dbms/src/Interpreters/NullableUtils.cpp | 7 +- dbms/src/Interpreters/NullableUtils.h | 4 +- dbms/src/Interpreters/Set.cpp | 9 +- .../00875_join_right_nulls.reference | 22 ++++ .../0_stateless/00875_join_right_nulls.sql | 28 +++++ 8 files changed, 168 insertions(+), 44 deletions(-) create mode 100644 dbms/src/Common/ErasedType.h create mode 100644 dbms/tests/queries/0_stateless/00875_join_right_nulls.reference create mode 100644 dbms/tests/queries/0_stateless/00875_join_right_nulls.sql diff --git a/dbms/src/Common/ErasedType.h b/dbms/src/Common/ErasedType.h new file mode 100644 index 00000000000..7f4c7ce0fbd --- /dev/null +++ b/dbms/src/Common/ErasedType.h @@ -0,0 +1,24 @@ +#pragma once +#include + +namespace DB +{ + +struct ErasedType +{ + using Ptr = std::unique_ptr>; + + template + static Ptr create(const Args & ... args) + { + return Ptr(static_cast(new T(args...)), [](void * ptr) { delete reinterpret_cast(ptr); }); + } + + template + static T & get(Ptr & ptr) + { + return *reinterpret_cast(ptr.get()); + } +}; + +} diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 0c55d20d760..43b63cee251 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -16,6 +16,7 @@ #include #include +#include #include @@ -531,9 +532,8 @@ bool Join::insertFromBlock(const Block & block) } /// We will insert to the map only keys, where all components are not NULL. - ColumnPtr null_map_holder; ConstNullMapPtr null_map{}; - extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map); + ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map); size_t rows = block.rows(); @@ -565,6 +565,10 @@ bool Join::insertFromBlock(const Block & block) }); } + /// If RIGHT or FULL save blocks with nulls for NonJoinedBlockInputStream + if (isRightOrFull(kind) && null_map) + blocks_nullmaps.emplace_back(stored_block, null_map_holder); + return limits.check(getTotalRowCount(), getTotalByteCount(), "JOIN", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED); } @@ -797,9 +801,8 @@ void Join::joinBlockImpl( } /// Keys with NULL value in any column won't join to anything. - ColumnPtr null_map_holder; ConstNullMapPtr null_map{}; - extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map); + ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map); size_t existing_columns = block.columns(); @@ -1167,7 +1170,9 @@ class NonJoinedBlockInputStream : public IBlockInputStream public: NonJoinedBlockInputStream(const Join & parent_, const Block & left_sample_block, const Names & key_names_left, const NamesAndTypesList & columns_added_by_join, UInt64 max_block_size_) - : parent(parent_), max_block_size(max_block_size_) + : parent(parent_) + , max_block_size(max_block_size_) + , nulls_it(dirtyIterator()) { /** left_sample_block contains keys and "left" columns. * result_sample_block - keys, "left" columns, and "right" columns. @@ -1235,12 +1240,7 @@ protected: { if (parent.blocks.empty()) return Block(); - - Block block; - if (!joinDispatch(parent.kind, parent.strictness, parent.maps, - [&](auto, auto strictness, auto & map) { block = createBlock(map); })) - throw Exception("Logical error: unknown JOIN strictness (must be on of: ANY, ALL, ASOF)", ErrorCodes::LOGICAL_ERROR); - return block; + return createBlock(); } private: @@ -1256,8 +1256,14 @@ private: /// Which key columns need change nullability (right is nullable and left is not or vice versa) std::vector key_nullability_changes; - std::unique_ptr> position; /// type erasure + ErasedType::Ptr position; + Join::BlockNullmapList::const_iterator nulls_it; + static Join::BlockNullmapList::const_iterator dirtyIterator() + { + static const Join::BlockNullmapList dirty{}; + return dirty.end(); + } void makeResultSampleBlock(const Block & left_sample_block, const Block & right_sample_block, const NamesAndTypesList & columns_added_by_join, @@ -1304,8 +1310,7 @@ private: } } - template - Block createBlock(const Maps & maps) + Block createBlock() { MutableColumns columns_left = columnsForIndex(result_sample_block, column_indices_left); MutableColumns columns_keys_and_right = columnsForIndex(result_sample_block, column_indices_keys_and_right); @@ -1315,25 +1320,22 @@ private: size_t rows_added = 0; - switch (parent.type) + auto fill_callback = [&](auto, auto strictness, auto & map) { - #define M(TYPE) \ - case Join::Type::TYPE: \ - rows_added = fillColumns(*maps.TYPE, columns_keys_and_right); \ - break; - APPLY_FOR_JOIN_VARIANTS(M) - #undef M + rows_added = fillColumnsFromMap(map, columns_keys_and_right); + }; - default: - throw Exception("Unknown JOIN keys variant.", ErrorCodes::UNKNOWN_SET_DATA_VARIANT); - } - - if (!rows_added) - return {}; + if (!joinDispatch(parent.kind, parent.strictness, parent.maps, fill_callback)) + throw Exception("Logical error: unknown JOIN strictness (must be on of: ANY, ALL, ASOF)", ErrorCodes::LOGICAL_ERROR); /// Revert columns nullability changeNullability(columns_keys_and_right, key_nullability_changes); + fillNullsFromBlocks(columns_keys_and_right, rows_added); + + if (!rows_added) + return {}; + Block res = result_sample_block.cloneEmpty(); /// @note it's possible to make ColumnConst here and materialize it later @@ -1362,25 +1364,44 @@ private: return columns; } + template + size_t fillColumnsFromMap(const Maps & maps, MutableColumns & columns_keys_and_right) + { + switch (parent.type) + { + #define M(TYPE) \ + case Join::Type::TYPE: \ + return fillColumns(*maps.TYPE, columns_keys_and_right); + APPLY_FOR_JOIN_VARIANTS(M) + #undef M + default: + throw Exception("Unknown JOIN keys variant.", ErrorCodes::UNKNOWN_SET_DATA_VARIANT); + } + + __builtin_unreachable(); + } + template size_t fillColumns(const Map & map, MutableColumns & columns_keys_and_right) { + using Mapped = typename Map::mapped_type; + using Iterator = typename Map::const_iterator; + size_t rows_added = 0; if (!position) - position = decltype(position)( - static_cast(new typename Map::const_iterator(map.begin())), //-V572 - [](void * ptr) { delete reinterpret_cast(ptr); }); + position = ErasedType::create(map.begin()); - auto & it = *reinterpret_cast(position.get()); + Iterator & it = ErasedType::get(position); auto end = map.end(); for (; it != end; ++it) { - if (it->getSecond().getUsed()) + const Mapped & mapped = it->getSecond(); + if (mapped.getUsed()) continue; - AdderNonJoined::add(it->getSecond(), rows_added, columns_keys_and_right); + AdderNonJoined::add(mapped, rows_added, columns_keys_and_right); if (rows_added >= max_block_size) { @@ -1392,6 +1413,30 @@ private: return rows_added; } + void fillNullsFromBlocks(MutableColumns & columns_keys_and_right, size_t & rows_added) + { + if (nulls_it == dirtyIterator()) + nulls_it = parent.blocks_nullmaps.begin(); + + auto end = parent.blocks_nullmaps.end(); + + for (; nulls_it != end && rows_added < max_block_size; ++nulls_it) + { + const Block * block = nulls_it->first; + const NullMap & nullmap = static_cast(*nulls_it->second).getData(); + + for (size_t row = 0; row < nullmap.size(); ++row) + { + if (nullmap[row]) + { + for (size_t col = 0; col < columns_keys_and_right.size(); ++col) + columns_keys_and_right[col]->insertFrom(*block->getByPosition(col).column, row); + ++rows_added; + } + } + } + } + static std::unordered_set getNullabilityChanges(const Block & sample_block_with_keys, const Block & out_block, const std::vector & key_positions, const std::unordered_map & left_to_right_key_map) diff --git a/dbms/src/Interpreters/Join.h b/dbms/src/Interpreters/Join.h index 6105edc68fe..850769a7c53 100644 --- a/dbms/src/Interpreters/Join.h +++ b/dbms/src/Interpreters/Join.h @@ -288,10 +288,13 @@ private: /// Overwrite existing values when encountering the same key again bool any_take_last_row; - /** Blocks of "right" table. - */ + /// Blocks of "right" table. BlocksList blocks; + /// Nullmaps for blocks of "right" table (if needed) + using BlockNullmapList = std::list>; + BlockNullmapList blocks_nullmaps; + MapsVariant maps; /// Additional data - strings for string keys and continuation elements of single-linked lists of references to rows. diff --git a/dbms/src/Interpreters/NullableUtils.cpp b/dbms/src/Interpreters/NullableUtils.cpp index 590fc68a573..9299a591afd 100644 --- a/dbms/src/Interpreters/NullableUtils.cpp +++ b/dbms/src/Interpreters/NullableUtils.cpp @@ -4,13 +4,16 @@ namespace DB { -void extractNestedColumnsAndNullMap(ColumnRawPtrs & key_columns, ColumnPtr & null_map_holder, ConstNullMapPtr & null_map) +ColumnPtr extractNestedColumnsAndNullMap(ColumnRawPtrs & key_columns, ConstNullMapPtr & null_map) { + ColumnPtr null_map_holder; + if (key_columns.size() == 1) { auto & column = key_columns[0]; if (auto * column_nullable = checkAndGetColumn(*column)) { + null_map_holder = column_nullable->getNullMapColumnPtr(); null_map = &column_nullable->getNullMapData(); column = &column_nullable->getNestedColumn(); } @@ -43,6 +46,8 @@ void extractNestedColumnsAndNullMap(ColumnRawPtrs & key_columns, ColumnPtr & nul null_map = null_map_holder ? &static_cast(*null_map_holder).getData() : nullptr; } + + return null_map_holder; } } diff --git a/dbms/src/Interpreters/NullableUtils.h b/dbms/src/Interpreters/NullableUtils.h index e0689b853d7..ee3193919cd 100644 --- a/dbms/src/Interpreters/NullableUtils.h +++ b/dbms/src/Interpreters/NullableUtils.h @@ -6,8 +6,8 @@ namespace DB /** Replace Nullable key_columns to corresponding nested columns. * In 'null_map' return a map of positions where at least one column was NULL. - * null_map_holder could take ownership of null_map, if required. + * @returns ownership column of null_map. */ -void extractNestedColumnsAndNullMap(ColumnRawPtrs & key_columns, ColumnPtr & null_map_holder, ConstNullMapPtr & null_map); +ColumnPtr extractNestedColumnsAndNullMap(ColumnRawPtrs & key_columns, ConstNullMapPtr & null_map); } diff --git a/dbms/src/Interpreters/Set.cpp b/dbms/src/Interpreters/Set.cpp index 98705caa949..7840a314380 100644 --- a/dbms/src/Interpreters/Set.cpp +++ b/dbms/src/Interpreters/Set.cpp @@ -127,9 +127,8 @@ void Set::setHeader(const Block & block) } /// We will insert to the Set only keys, where all components are not NULL. - ColumnPtr null_map_holder; ConstNullMapPtr null_map{}; - extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map); + ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map); if (fill_set_elements) { @@ -168,9 +167,8 @@ bool Set::insertFromBlock(const Block & block) size_t rows = block.rows(); /// We will insert to the Set only keys, where all components are not NULL. - ColumnPtr null_map_holder; ConstNullMapPtr null_map{}; - extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map); + ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map); /// Filter to extract distinct values from the block. ColumnUInt8::MutablePtr filter; @@ -349,9 +347,8 @@ ColumnPtr Set::execute(const Block & block, bool negative) const } /// We will check existence in Set only for keys, where all components are not NULL. - ColumnPtr null_map_holder; ConstNullMapPtr null_map{}; - extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map); + ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map); executeOrdinary(key_columns, vec_res, negative, null_map); diff --git a/dbms/tests/queries/0_stateless/00875_join_right_nulls.reference b/dbms/tests/queries/0_stateless/00875_join_right_nulls.reference new file mode 100644 index 00000000000..e9b008c2433 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00875_join_right_nulls.reference @@ -0,0 +1,22 @@ +n rj n 1 1 +n rj n id id +n rj n \N \N +n fj n 1 1 +n fj n id id +n fj n \N \N +n fj n \N \N +t rj n \N +t rj n 1 1 +t rj n id id +t fj n \N +t fj n 1 1 +t fj n id id +n rj t 1 1 +n rj t id id +n fj t 1 1 +n fj t id id +n fj t \N \N +0 2 +1001 2 +1001 2 +2002 2 diff --git a/dbms/tests/queries/0_stateless/00875_join_right_nulls.sql b/dbms/tests/queries/0_stateless/00875_join_right_nulls.sql new file mode 100644 index 00000000000..fb01bdfb45c --- /dev/null +++ b/dbms/tests/queries/0_stateless/00875_join_right_nulls.sql @@ -0,0 +1,28 @@ +DROP TABLE IF EXISTS t; +DROP TABLE IF EXISTS nt; + +CREATE TABLE t (x String) ENGINE = Memory; +CREATE TABLE nt (x Nullable(String)) ENGINE = Memory; + +INSERT INTO t (x) VALUES ('id'), ('1'); +INSERT INTO nt (x) VALUES ('id'), (NULL), ('1'); + +SET join_use_nulls = 1; + +SELECT 'n rj n', t1.x, t2.x FROM nt AS t1 RIGHT JOIN nt AS t2 ON t1.x = t2.x ORDER BY t1.x; +SELECT 'n fj n', t1.x, t2.x FROM nt AS t1 FULL JOIN nt AS t2 ON t1.x = t2.x ORDER BY t1.x; + +SELECT 't rj n', t1.x, t2.x FROM t AS t1 RIGHT JOIN nt AS t2 ON t1.x = t2.x ORDER BY t1.x; +SELECT 't fj n', t1.x, t2.x FROM t AS t1 FULL JOIN nt AS t2 ON t1.x = t2.x ORDER BY t1.x; + +SELECT 'n rj t', t1.x, t2.x FROM nt AS t1 RIGHT JOIN t AS t2 ON t1.x = t2.x ORDER BY t1.x; +SELECT 'n fj t', t1.x, t2.x FROM nt AS t1 FULL JOIN t AS t2 ON t1.x = t2.x ORDER BY t1.x; + +INSERT INTO nt (x) SELECT NULL as x FROM numbers(1000); +SELECT sum(isNull(t1.x)), count(t1.x) FROM nt AS t1 INNER JOIN nt AS t2 ON t1.x = t2.x; +SELECT sum(isNull(t1.x)), count(t1.x) FROM nt AS t1 LEFT JOIN nt AS t2 ON t1.x = t2.x; +SELECT sum(isNull(t1.x)), count(t1.x) FROM nt AS t1 RIGHT JOIN nt AS t2 ON t1.x = t2.x; +SELECT sum(isNull(t1.x)), count(t1.x) FROM nt AS t1 FULL JOIN nt AS t2 ON t1.x = t2.x; + +DROP TABLE t; +DROP TABLE nt; From 4759e8632e80bca2b9523b4f39c455d97d516a8e Mon Sep 17 00:00:00 2001 From: chertus Date: Thu, 4 Jul 2019 14:50:32 +0300 Subject: [PATCH 2/4] fix crash and update test results --- dbms/src/Interpreters/Join.cpp | 6 ++--- .../00445_join_nullable_keys.reference | 1 + .../0_stateless/00702_join_on_dups.reference | 20 ++++++++++++++++ .../00853_join_with_nulls_crash.reference | 3 +++ .../00875_join_right_nulls.reference | 24 +++++++++++++++++++ .../0_stateless/00875_join_right_nulls.sql | 20 ++++++++++++++++ 6 files changed, 71 insertions(+), 3 deletions(-) diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 43b63cee251..e3b5048354d 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -1328,14 +1328,14 @@ private: if (!joinDispatch(parent.kind, parent.strictness, parent.maps, fill_callback)) throw Exception("Logical error: unknown JOIN strictness (must be on of: ANY, ALL, ASOF)", ErrorCodes::LOGICAL_ERROR); - /// Revert columns nullability - changeNullability(columns_keys_and_right, key_nullability_changes); - fillNullsFromBlocks(columns_keys_and_right, rows_added); if (!rows_added) return {}; + /// Revert columns nullability + changeNullability(columns_keys_and_right, key_nullability_changes); + Block res = result_sample_block.cloneEmpty(); /// @note it's possible to make ColumnConst here and materialize it later diff --git a/dbms/tests/queries/0_stateless/00445_join_nullable_keys.reference b/dbms/tests/queries/0_stateless/00445_join_nullable_keys.reference index 20673c90278..f7675766dc9 100644 --- a/dbms/tests/queries/0_stateless/00445_join_nullable_keys.reference +++ b/dbms/tests/queries/0_stateless/00445_join_nullable_keys.reference @@ -21,3 +21,4 @@ 12 12 13 13 14 14 +\N 8 diff --git a/dbms/tests/queries/0_stateless/00702_join_on_dups.reference b/dbms/tests/queries/0_stateless/00702_join_on_dups.reference index 769d2941564..d874ac1f0cf 100644 --- a/dbms/tests/queries/0_stateless/00702_join_on_dups.reference +++ b/dbms/tests/queries/0_stateless/00702_join_on_dups.reference @@ -206,11 +206,21 @@ self right 8 l8 \N 8 l8 \N 9 l9 \N 9 l9 \N self right nullable +0 \N 4 l5 \N +0 \N 4 l6 \N +0 \N 5 l7 \N +0 \N 8 l8 \N +0 \N 9 l9 \N 1 l1 1 1 l1 1 2 l2 2 2 l2 2 2 l3 3 2 l3 3 3 l4 4 3 l4 4 self right nullable vs not nullable +0 \N 4 l5 \N +0 \N 4 l6 \N +0 \N 5 l7 \N +0 \N 8 l8 \N +0 \N 9 l9 \N 1 l1 1 1 l1 1 2 l2 2 2 l2 2 2 l3 3 2 l2 2 @@ -232,6 +242,11 @@ self full 8 l8 \N 8 l8 \N 9 l9 \N 9 l9 \N self full nullable +0 \N 4 l5 \N +0 \N 4 l6 \N +0 \N 5 l7 \N +0 \N 8 l8 \N +0 \N 9 l9 \N 1 l1 1 1 l1 1 2 l2 2 2 l2 2 2 l3 3 2 l3 3 @@ -242,6 +257,11 @@ self full nullable 8 l8 \N 0 \N 9 l9 \N 0 \N self full nullable vs not nullable +0 \N 4 l5 \N +0 \N 4 l6 \N +0 \N 5 l7 \N +0 \N 8 l8 \N +0 \N 9 l9 \N 1 l1 1 1 l1 1 2 l2 2 2 l2 2 2 l3 3 2 l2 2 diff --git a/dbms/tests/queries/0_stateless/00853_join_with_nulls_crash.reference b/dbms/tests/queries/0_stateless/00853_join_with_nulls_crash.reference index b6168923b09..c23c6409aa2 100644 --- a/dbms/tests/queries/0_stateless/00853_join_with_nulls_crash.reference +++ b/dbms/tests/queries/0_stateless/00853_join_with_nulls_crash.reference @@ -9,8 +9,11 @@ foo \N 2 0 Nullable(String) Nullable(String) foo 2 0 String Nullable(String) bar bar 1 2 String Nullable(String) test 0 1 String Nullable(String) + \N 0 1 String Nullable(String) bar bar 1 2 String Nullable(String) test 0 1 String Nullable(String) + \N 0 1 String Nullable(String) foo 2 0 String bar 1 2 String test 0 1 String + 0 1 String diff --git a/dbms/tests/queries/0_stateless/00875_join_right_nulls.reference b/dbms/tests/queries/0_stateless/00875_join_right_nulls.reference index e9b008c2433..01932c3f068 100644 --- a/dbms/tests/queries/0_stateless/00875_join_right_nulls.reference +++ b/dbms/tests/queries/0_stateless/00875_join_right_nulls.reference @@ -1,3 +1,23 @@ +on +n rj n 1 1 +n rj n id id +n rj n \N \N +n fj n 1 1 +n fj n id id +n fj n \N \N +n fj n \N \N +t rj n \N +t rj n 1 1 +t rj n id id +t fj n \N +t fj n 1 1 +t fj n id id +n rj t 1 1 +n rj t id id +n fj t 1 1 +n fj t id id +n fj t \N \N +using n rj n 1 1 n rj n id id n rj n \N \N @@ -20,3 +40,7 @@ n fj t \N \N 1001 2 1001 2 2002 2 +0 2 +1001 2 +1001 2 +2002 2 diff --git a/dbms/tests/queries/0_stateless/00875_join_right_nulls.sql b/dbms/tests/queries/0_stateless/00875_join_right_nulls.sql index fb01bdfb45c..288a3043569 100644 --- a/dbms/tests/queries/0_stateless/00875_join_right_nulls.sql +++ b/dbms/tests/queries/0_stateless/00875_join_right_nulls.sql @@ -9,6 +9,8 @@ INSERT INTO nt (x) VALUES ('id'), (NULL), ('1'); SET join_use_nulls = 1; +SELECT 'on'; + SELECT 'n rj n', t1.x, t2.x FROM nt AS t1 RIGHT JOIN nt AS t2 ON t1.x = t2.x ORDER BY t1.x; SELECT 'n fj n', t1.x, t2.x FROM nt AS t1 FULL JOIN nt AS t2 ON t1.x = t2.x ORDER BY t1.x; @@ -18,11 +20,29 @@ SELECT 't fj n', t1.x, t2.x FROM t AS t1 FULL JOIN nt AS t2 ON t1.x = t2.x ORDER SELECT 'n rj t', t1.x, t2.x FROM nt AS t1 RIGHT JOIN t AS t2 ON t1.x = t2.x ORDER BY t1.x; SELECT 'n fj t', t1.x, t2.x FROM nt AS t1 FULL JOIN t AS t2 ON t1.x = t2.x ORDER BY t1.x; +SELECT 'using'; + +SELECT 'n rj n', t1.x, t2.x FROM nt AS t1 RIGHT JOIN nt AS t2 USING(x) ORDER BY t1.x; +SELECT 'n fj n', t1.x, t2.x FROM nt AS t1 FULL JOIN nt AS t2 USING(x) ORDER BY t1.x; + +SELECT 't rj n', t1.x, t2.x FROM t AS t1 RIGHT JOIN nt AS t2 USING(x) ORDER BY t1.x; +SELECT 't fj n', t1.x, t2.x FROM t AS t1 FULL JOIN nt AS t2 USING(x) ORDER BY t1.x; + +SELECT 'n rj t', t1.x, t2.x FROM nt AS t1 RIGHT JOIN t AS t2 USING(x) ORDER BY t1.x; +SELECT 'n fj t', t1.x, t2.x FROM nt AS t1 FULL JOIN t AS t2 USING(x) ORDER BY t1.x; + + INSERT INTO nt (x) SELECT NULL as x FROM numbers(1000); + SELECT sum(isNull(t1.x)), count(t1.x) FROM nt AS t1 INNER JOIN nt AS t2 ON t1.x = t2.x; SELECT sum(isNull(t1.x)), count(t1.x) FROM nt AS t1 LEFT JOIN nt AS t2 ON t1.x = t2.x; SELECT sum(isNull(t1.x)), count(t1.x) FROM nt AS t1 RIGHT JOIN nt AS t2 ON t1.x = t2.x; SELECT sum(isNull(t1.x)), count(t1.x) FROM nt AS t1 FULL JOIN nt AS t2 ON t1.x = t2.x; +SELECT sum(isNull(t1.x)), count(t1.x) FROM nt AS t1 INNER JOIN nt AS t2 USING(x); +SELECT sum(isNull(t1.x)), count(t1.x) FROM nt AS t1 LEFT JOIN nt AS t2 USING(x); +SELECT sum(isNull(t1.x)), count(t1.x) FROM nt AS t1 RIGHT JOIN nt AS t2 USING(x); +SELECT sum(isNull(t1.x)), count(t1.x) FROM nt AS t1 FULL JOIN nt AS t2 USING(x); + DROP TABLE t; DROP TABLE nt; From 301345724a772398898427dabb6d0ba203cdc902 Mon Sep 17 00:00:00 2001 From: chertus Date: Thu, 4 Jul 2019 15:12:39 +0300 Subject: [PATCH 3/4] use better data types --- dbms/src/Common/ErasedType.h | 24 ------------------------ dbms/src/Interpreters/Join.cpp | 29 ++++++++++++----------------- dbms/src/Interpreters/Join.h | 3 ++- 3 files changed, 14 insertions(+), 42 deletions(-) delete mode 100644 dbms/src/Common/ErasedType.h diff --git a/dbms/src/Common/ErasedType.h b/dbms/src/Common/ErasedType.h deleted file mode 100644 index 7f4c7ce0fbd..00000000000 --- a/dbms/src/Common/ErasedType.h +++ /dev/null @@ -1,24 +0,0 @@ -#pragma once -#include - -namespace DB -{ - -struct ErasedType -{ - using Ptr = std::unique_ptr>; - - template - static Ptr create(const Args & ... args) - { - return Ptr(static_cast(new T(args...)), [](void * ptr) { delete reinterpret_cast(ptr); }); - } - - template - static T & get(Ptr & ptr) - { - return *reinterpret_cast(ptr.get()); - } -}; - -} diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index e3b5048354d..929560fb2cb 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -1,3 +1,5 @@ +#include + #include #include @@ -16,7 +18,6 @@ #include #include -#include #include @@ -1172,7 +1173,6 @@ public: const NamesAndTypesList & columns_added_by_join, UInt64 max_block_size_) : parent(parent_) , max_block_size(max_block_size_) - , nulls_it(dirtyIterator()) { /** left_sample_block contains keys and "left" columns. * result_sample_block - keys, "left" columns, and "right" columns. @@ -1256,14 +1256,9 @@ private: /// Which key columns need change nullability (right is nullable and left is not or vice versa) std::vector key_nullability_changes; - ErasedType::Ptr position; - Join::BlockNullmapList::const_iterator nulls_it; + std::any position; + std::optional nulls_position; - static Join::BlockNullmapList::const_iterator dirtyIterator() - { - static const Join::BlockNullmapList dirty{}; - return dirty.end(); - } void makeResultSampleBlock(const Block & left_sample_block, const Block & right_sample_block, const NamesAndTypesList & columns_added_by_join, @@ -1389,10 +1384,10 @@ private: size_t rows_added = 0; - if (!position) - position = ErasedType::create(map.begin()); + if (!position.has_value()) + position = std::make_any(map.begin()); - Iterator & it = ErasedType::get(position); + Iterator & it = std::any_cast(position); auto end = map.end(); for (; it != end; ++it) @@ -1415,15 +1410,15 @@ private: void fillNullsFromBlocks(MutableColumns & columns_keys_and_right, size_t & rows_added) { - if (nulls_it == dirtyIterator()) - nulls_it = parent.blocks_nullmaps.begin(); + if (!nulls_position.has_value()) + nulls_position = parent.blocks_nullmaps.begin(); auto end = parent.blocks_nullmaps.end(); - for (; nulls_it != end && rows_added < max_block_size; ++nulls_it) + for (auto & it = *nulls_position; it != end && rows_added < max_block_size; ++it) { - const Block * block = nulls_it->first; - const NullMap & nullmap = static_cast(*nulls_it->second).getData(); + const Block * block = it->first; + const NullMap & nullmap = static_cast(*it->second).getData(); for (size_t row = 0; row < nullmap.size(); ++row) { diff --git a/dbms/src/Interpreters/Join.h b/dbms/src/Interpreters/Join.h index 850769a7c53..f57755fad91 100644 --- a/dbms/src/Interpreters/Join.h +++ b/dbms/src/Interpreters/Join.h @@ -3,6 +3,7 @@ #include #include #include +#include #include @@ -292,7 +293,7 @@ private: BlocksList blocks; /// Nullmaps for blocks of "right" table (if needed) - using BlockNullmapList = std::list>; + using BlockNullmapList = std::deque>; BlockNullmapList blocks_nullmaps; MapsVariant maps; From 8dc681079956461bf7935b95d0fcc6b783545f9e Mon Sep 17 00:00:00 2001 From: chertus Date: Thu, 4 Jul 2019 15:38:54 +0300 Subject: [PATCH 4/4] do not save null bitmaps for blocks without nulls --- dbms/src/Interpreters/Join.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 929560fb2cb..cf23aef5b67 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -568,7 +568,14 @@ bool Join::insertFromBlock(const Block & block) /// If RIGHT or FULL save blocks with nulls for NonJoinedBlockInputStream if (isRightOrFull(kind) && null_map) - blocks_nullmaps.emplace_back(stored_block, null_map_holder); + { + UInt8 has_null = 0; + for (size_t i = 0; !has_null && i < null_map->size(); ++i) + has_null |= (*null_map)[i]; + + if (has_null) + blocks_nullmaps.emplace_back(stored_block, null_map_holder); + } return limits.check(getTotalRowCount(), getTotalByteCount(), "JOIN", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED); }