diff --git a/dbms/src/Common/ErasedType.h b/dbms/src/Common/ErasedType.h new file mode 100644 index 00000000000..7f4c7ce0fbd --- /dev/null +++ b/dbms/src/Common/ErasedType.h @@ -0,0 +1,24 @@ +#pragma once +#include + +namespace DB +{ + +struct ErasedType +{ + using Ptr = std::unique_ptr>; + + template + static Ptr create(const Args & ... args) + { + return Ptr(static_cast(new T(args...)), [](void * ptr) { delete reinterpret_cast(ptr); }); + } + + template + static T & get(Ptr & ptr) + { + return *reinterpret_cast(ptr.get()); + } +}; + +} diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 0c55d20d760..43b63cee251 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -16,6 +16,7 @@ #include #include +#include #include @@ -531,9 +532,8 @@ bool Join::insertFromBlock(const Block & block) } /// We will insert to the map only keys, where all components are not NULL. - ColumnPtr null_map_holder; ConstNullMapPtr null_map{}; - extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map); + ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map); size_t rows = block.rows(); @@ -565,6 +565,10 @@ bool Join::insertFromBlock(const Block & block) }); } + /// If RIGHT or FULL save blocks with nulls for NonJoinedBlockInputStream + if (isRightOrFull(kind) && null_map) + blocks_nullmaps.emplace_back(stored_block, null_map_holder); + return limits.check(getTotalRowCount(), getTotalByteCount(), "JOIN", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED); } @@ -797,9 +801,8 @@ void Join::joinBlockImpl( } /// Keys with NULL value in any column won't join to anything. - ColumnPtr null_map_holder; ConstNullMapPtr null_map{}; - extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map); + ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map); size_t existing_columns = block.columns(); @@ -1167,7 +1170,9 @@ class NonJoinedBlockInputStream : public IBlockInputStream public: NonJoinedBlockInputStream(const Join & parent_, const Block & left_sample_block, const Names & key_names_left, const NamesAndTypesList & columns_added_by_join, UInt64 max_block_size_) - : parent(parent_), max_block_size(max_block_size_) + : parent(parent_) + , max_block_size(max_block_size_) + , nulls_it(dirtyIterator()) { /** left_sample_block contains keys and "left" columns. * result_sample_block - keys, "left" columns, and "right" columns. @@ -1235,12 +1240,7 @@ protected: { if (parent.blocks.empty()) return Block(); - - Block block; - if (!joinDispatch(parent.kind, parent.strictness, parent.maps, - [&](auto, auto strictness, auto & map) { block = createBlock(map); })) - throw Exception("Logical error: unknown JOIN strictness (must be on of: ANY, ALL, ASOF)", ErrorCodes::LOGICAL_ERROR); - return block; + return createBlock(); } private: @@ -1256,8 +1256,14 @@ private: /// Which key columns need change nullability (right is nullable and left is not or vice versa) std::vector key_nullability_changes; - std::unique_ptr> position; /// type erasure + ErasedType::Ptr position; + Join::BlockNullmapList::const_iterator nulls_it; + static Join::BlockNullmapList::const_iterator dirtyIterator() + { + static const Join::BlockNullmapList dirty{}; + return dirty.end(); + } void makeResultSampleBlock(const Block & left_sample_block, const Block & right_sample_block, const NamesAndTypesList & columns_added_by_join, @@ -1304,8 +1310,7 @@ private: } } - template - Block createBlock(const Maps & maps) + Block createBlock() { MutableColumns columns_left = columnsForIndex(result_sample_block, column_indices_left); MutableColumns columns_keys_and_right = columnsForIndex(result_sample_block, column_indices_keys_and_right); @@ -1315,25 +1320,22 @@ private: size_t rows_added = 0; - switch (parent.type) + auto fill_callback = [&](auto, auto strictness, auto & map) { - #define M(TYPE) \ - case Join::Type::TYPE: \ - rows_added = fillColumns(*maps.TYPE, columns_keys_and_right); \ - break; - APPLY_FOR_JOIN_VARIANTS(M) - #undef M + rows_added = fillColumnsFromMap(map, columns_keys_and_right); + }; - default: - throw Exception("Unknown JOIN keys variant.", ErrorCodes::UNKNOWN_SET_DATA_VARIANT); - } - - if (!rows_added) - return {}; + if (!joinDispatch(parent.kind, parent.strictness, parent.maps, fill_callback)) + throw Exception("Logical error: unknown JOIN strictness (must be on of: ANY, ALL, ASOF)", ErrorCodes::LOGICAL_ERROR); /// Revert columns nullability changeNullability(columns_keys_and_right, key_nullability_changes); + fillNullsFromBlocks(columns_keys_and_right, rows_added); + + if (!rows_added) + return {}; + Block res = result_sample_block.cloneEmpty(); /// @note it's possible to make ColumnConst here and materialize it later @@ -1362,25 +1364,44 @@ private: return columns; } + template + size_t fillColumnsFromMap(const Maps & maps, MutableColumns & columns_keys_and_right) + { + switch (parent.type) + { + #define M(TYPE) \ + case Join::Type::TYPE: \ + return fillColumns(*maps.TYPE, columns_keys_and_right); + APPLY_FOR_JOIN_VARIANTS(M) + #undef M + default: + throw Exception("Unknown JOIN keys variant.", ErrorCodes::UNKNOWN_SET_DATA_VARIANT); + } + + __builtin_unreachable(); + } + template size_t fillColumns(const Map & map, MutableColumns & columns_keys_and_right) { + using Mapped = typename Map::mapped_type; + using Iterator = typename Map::const_iterator; + size_t rows_added = 0; if (!position) - position = decltype(position)( - static_cast(new typename Map::const_iterator(map.begin())), //-V572 - [](void * ptr) { delete reinterpret_cast(ptr); }); + position = ErasedType::create(map.begin()); - auto & it = *reinterpret_cast(position.get()); + Iterator & it = ErasedType::get(position); auto end = map.end(); for (; it != end; ++it) { - if (it->getSecond().getUsed()) + const Mapped & mapped = it->getSecond(); + if (mapped.getUsed()) continue; - AdderNonJoined::add(it->getSecond(), rows_added, columns_keys_and_right); + AdderNonJoined::add(mapped, rows_added, columns_keys_and_right); if (rows_added >= max_block_size) { @@ -1392,6 +1413,30 @@ private: return rows_added; } + void fillNullsFromBlocks(MutableColumns & columns_keys_and_right, size_t & rows_added) + { + if (nulls_it == dirtyIterator()) + nulls_it = parent.blocks_nullmaps.begin(); + + auto end = parent.blocks_nullmaps.end(); + + for (; nulls_it != end && rows_added < max_block_size; ++nulls_it) + { + const Block * block = nulls_it->first; + const NullMap & nullmap = static_cast(*nulls_it->second).getData(); + + for (size_t row = 0; row < nullmap.size(); ++row) + { + if (nullmap[row]) + { + for (size_t col = 0; col < columns_keys_and_right.size(); ++col) + columns_keys_and_right[col]->insertFrom(*block->getByPosition(col).column, row); + ++rows_added; + } + } + } + } + static std::unordered_set getNullabilityChanges(const Block & sample_block_with_keys, const Block & out_block, const std::vector & key_positions, const std::unordered_map & left_to_right_key_map) diff --git a/dbms/src/Interpreters/Join.h b/dbms/src/Interpreters/Join.h index 6105edc68fe..850769a7c53 100644 --- a/dbms/src/Interpreters/Join.h +++ b/dbms/src/Interpreters/Join.h @@ -288,10 +288,13 @@ private: /// Overwrite existing values when encountering the same key again bool any_take_last_row; - /** Blocks of "right" table. - */ + /// Blocks of "right" table. BlocksList blocks; + /// Nullmaps for blocks of "right" table (if needed) + using BlockNullmapList = std::list>; + BlockNullmapList blocks_nullmaps; + MapsVariant maps; /// Additional data - strings for string keys and continuation elements of single-linked lists of references to rows. diff --git a/dbms/src/Interpreters/NullableUtils.cpp b/dbms/src/Interpreters/NullableUtils.cpp index 590fc68a573..9299a591afd 100644 --- a/dbms/src/Interpreters/NullableUtils.cpp +++ b/dbms/src/Interpreters/NullableUtils.cpp @@ -4,13 +4,16 @@ namespace DB { -void extractNestedColumnsAndNullMap(ColumnRawPtrs & key_columns, ColumnPtr & null_map_holder, ConstNullMapPtr & null_map) +ColumnPtr extractNestedColumnsAndNullMap(ColumnRawPtrs & key_columns, ConstNullMapPtr & null_map) { + ColumnPtr null_map_holder; + if (key_columns.size() == 1) { auto & column = key_columns[0]; if (auto * column_nullable = checkAndGetColumn(*column)) { + null_map_holder = column_nullable->getNullMapColumnPtr(); null_map = &column_nullable->getNullMapData(); column = &column_nullable->getNestedColumn(); } @@ -43,6 +46,8 @@ void extractNestedColumnsAndNullMap(ColumnRawPtrs & key_columns, ColumnPtr & nul null_map = null_map_holder ? &static_cast(*null_map_holder).getData() : nullptr; } + + return null_map_holder; } } diff --git a/dbms/src/Interpreters/NullableUtils.h b/dbms/src/Interpreters/NullableUtils.h index e0689b853d7..ee3193919cd 100644 --- a/dbms/src/Interpreters/NullableUtils.h +++ b/dbms/src/Interpreters/NullableUtils.h @@ -6,8 +6,8 @@ namespace DB /** Replace Nullable key_columns to corresponding nested columns. * In 'null_map' return a map of positions where at least one column was NULL. - * null_map_holder could take ownership of null_map, if required. + * @returns ownership column of null_map. */ -void extractNestedColumnsAndNullMap(ColumnRawPtrs & key_columns, ColumnPtr & null_map_holder, ConstNullMapPtr & null_map); +ColumnPtr extractNestedColumnsAndNullMap(ColumnRawPtrs & key_columns, ConstNullMapPtr & null_map); } diff --git a/dbms/src/Interpreters/Set.cpp b/dbms/src/Interpreters/Set.cpp index 98705caa949..7840a314380 100644 --- a/dbms/src/Interpreters/Set.cpp +++ b/dbms/src/Interpreters/Set.cpp @@ -127,9 +127,8 @@ void Set::setHeader(const Block & block) } /// We will insert to the Set only keys, where all components are not NULL. - ColumnPtr null_map_holder; ConstNullMapPtr null_map{}; - extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map); + ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map); if (fill_set_elements) { @@ -168,9 +167,8 @@ bool Set::insertFromBlock(const Block & block) size_t rows = block.rows(); /// We will insert to the Set only keys, where all components are not NULL. - ColumnPtr null_map_holder; ConstNullMapPtr null_map{}; - extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map); + ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map); /// Filter to extract distinct values from the block. ColumnUInt8::MutablePtr filter; @@ -349,9 +347,8 @@ ColumnPtr Set::execute(const Block & block, bool negative) const } /// We will check existence in Set only for keys, where all components are not NULL. - ColumnPtr null_map_holder; ConstNullMapPtr null_map{}; - extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map); + ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map); executeOrdinary(key_columns, vec_res, negative, null_map); diff --git a/dbms/tests/queries/0_stateless/00875_join_right_nulls.reference b/dbms/tests/queries/0_stateless/00875_join_right_nulls.reference new file mode 100644 index 00000000000..e9b008c2433 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00875_join_right_nulls.reference @@ -0,0 +1,22 @@ +n rj n 1 1 +n rj n id id +n rj n \N \N +n fj n 1 1 +n fj n id id +n fj n \N \N +n fj n \N \N +t rj n \N +t rj n 1 1 +t rj n id id +t fj n \N +t fj n 1 1 +t fj n id id +n rj t 1 1 +n rj t id id +n fj t 1 1 +n fj t id id +n fj t \N \N +0 2 +1001 2 +1001 2 +2002 2 diff --git a/dbms/tests/queries/0_stateless/00875_join_right_nulls.sql b/dbms/tests/queries/0_stateless/00875_join_right_nulls.sql new file mode 100644 index 00000000000..fb01bdfb45c --- /dev/null +++ b/dbms/tests/queries/0_stateless/00875_join_right_nulls.sql @@ -0,0 +1,28 @@ +DROP TABLE IF EXISTS t; +DROP TABLE IF EXISTS nt; + +CREATE TABLE t (x String) ENGINE = Memory; +CREATE TABLE nt (x Nullable(String)) ENGINE = Memory; + +INSERT INTO t (x) VALUES ('id'), ('1'); +INSERT INTO nt (x) VALUES ('id'), (NULL), ('1'); + +SET join_use_nulls = 1; + +SELECT 'n rj n', t1.x, t2.x FROM nt AS t1 RIGHT JOIN nt AS t2 ON t1.x = t2.x ORDER BY t1.x; +SELECT 'n fj n', t1.x, t2.x FROM nt AS t1 FULL JOIN nt AS t2 ON t1.x = t2.x ORDER BY t1.x; + +SELECT 't rj n', t1.x, t2.x FROM t AS t1 RIGHT JOIN nt AS t2 ON t1.x = t2.x ORDER BY t1.x; +SELECT 't fj n', t1.x, t2.x FROM t AS t1 FULL JOIN nt AS t2 ON t1.x = t2.x ORDER BY t1.x; + +SELECT 'n rj t', t1.x, t2.x FROM nt AS t1 RIGHT JOIN t AS t2 ON t1.x = t2.x ORDER BY t1.x; +SELECT 'n fj t', t1.x, t2.x FROM nt AS t1 FULL JOIN t AS t2 ON t1.x = t2.x ORDER BY t1.x; + +INSERT INTO nt (x) SELECT NULL as x FROM numbers(1000); +SELECT sum(isNull(t1.x)), count(t1.x) FROM nt AS t1 INNER JOIN nt AS t2 ON t1.x = t2.x; +SELECT sum(isNull(t1.x)), count(t1.x) FROM nt AS t1 LEFT JOIN nt AS t2 ON t1.x = t2.x; +SELECT sum(isNull(t1.x)), count(t1.x) FROM nt AS t1 RIGHT JOIN nt AS t2 ON t1.x = t2.x; +SELECT sum(isNull(t1.x)), count(t1.x) FROM nt AS t1 FULL JOIN nt AS t2 ON t1.x = t2.x; + +DROP TABLE t; +DROP TABLE nt;