mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 16:50:48 +00:00
Merge pull request #5859 from 4ertus2/joins
Fix FULL|RIGHT JOIN with nulls in right table's keys
This commit is contained in:
commit
8b62c96989
@ -1,3 +1,5 @@
|
||||
#include <any>
|
||||
|
||||
#include <common/logger_useful.h>
|
||||
|
||||
#include <Columns/ColumnConst.h>
|
||||
@ -531,9 +533,8 @@ bool Join::insertFromBlock(const Block & block)
|
||||
}
|
||||
|
||||
/// We will insert to the map only keys, where all components are not NULL.
|
||||
ColumnPtr null_map_holder;
|
||||
ConstNullMapPtr null_map{};
|
||||
extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map);
|
||||
ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map);
|
||||
|
||||
size_t rows = block.rows();
|
||||
|
||||
@ -565,6 +566,17 @@ bool Join::insertFromBlock(const Block & block)
|
||||
});
|
||||
}
|
||||
|
||||
/// If RIGHT or FULL save blocks with nulls for NonJoinedBlockInputStream
|
||||
if (isRightOrFull(kind) && null_map)
|
||||
{
|
||||
UInt8 has_null = 0;
|
||||
for (size_t i = 0; !has_null && i < null_map->size(); ++i)
|
||||
has_null |= (*null_map)[i];
|
||||
|
||||
if (has_null)
|
||||
blocks_nullmaps.emplace_back(stored_block, null_map_holder);
|
||||
}
|
||||
|
||||
return limits.check(getTotalRowCount(), getTotalByteCount(), "JOIN", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED);
|
||||
}
|
||||
|
||||
@ -797,9 +809,8 @@ void Join::joinBlockImpl(
|
||||
}
|
||||
|
||||
/// Keys with NULL value in any column won't join to anything.
|
||||
ColumnPtr null_map_holder;
|
||||
ConstNullMapPtr null_map{};
|
||||
extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map);
|
||||
ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map);
|
||||
|
||||
size_t existing_columns = block.columns();
|
||||
|
||||
@ -1167,7 +1178,8 @@ class NonJoinedBlockInputStream : public IBlockInputStream
|
||||
public:
|
||||
NonJoinedBlockInputStream(const Join & parent_, const Block & left_sample_block, const Names & key_names_left,
|
||||
const NamesAndTypesList & columns_added_by_join, UInt64 max_block_size_)
|
||||
: parent(parent_), max_block_size(max_block_size_)
|
||||
: parent(parent_)
|
||||
, max_block_size(max_block_size_)
|
||||
{
|
||||
/** left_sample_block contains keys and "left" columns.
|
||||
* result_sample_block - keys, "left" columns, and "right" columns.
|
||||
@ -1235,12 +1247,7 @@ protected:
|
||||
{
|
||||
if (parent.blocks.empty())
|
||||
return Block();
|
||||
|
||||
Block block;
|
||||
if (!joinDispatch(parent.kind, parent.strictness, parent.maps,
|
||||
[&](auto, auto strictness, auto & map) { block = createBlock<strictness>(map); }))
|
||||
throw Exception("Logical error: unknown JOIN strictness (must be on of: ANY, ALL, ASOF)", ErrorCodes::LOGICAL_ERROR);
|
||||
return block;
|
||||
return createBlock();
|
||||
}
|
||||
|
||||
private:
|
||||
@ -1256,7 +1263,8 @@ private:
|
||||
/// Which key columns need change nullability (right is nullable and left is not or vice versa)
|
||||
std::vector<bool> key_nullability_changes;
|
||||
|
||||
std::unique_ptr<void, std::function<void(void *)>> position; /// type erasure
|
||||
std::any position;
|
||||
std::optional<Join::BlockNullmapList::const_iterator> nulls_position;
|
||||
|
||||
|
||||
void makeResultSampleBlock(const Block & left_sample_block, const Block & right_sample_block,
|
||||
@ -1304,8 +1312,7 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
template <ASTTableJoin::Strictness STRICTNESS, typename Maps>
|
||||
Block createBlock(const Maps & maps)
|
||||
Block createBlock()
|
||||
{
|
||||
MutableColumns columns_left = columnsForIndex(result_sample_block, column_indices_left);
|
||||
MutableColumns columns_keys_and_right = columnsForIndex(result_sample_block, column_indices_keys_and_right);
|
||||
@ -1315,18 +1322,15 @@ private:
|
||||
|
||||
size_t rows_added = 0;
|
||||
|
||||
switch (parent.type)
|
||||
auto fill_callback = [&](auto, auto strictness, auto & map)
|
||||
{
|
||||
#define M(TYPE) \
|
||||
case Join::Type::TYPE: \
|
||||
rows_added = fillColumns<STRICTNESS>(*maps.TYPE, columns_keys_and_right); \
|
||||
break;
|
||||
APPLY_FOR_JOIN_VARIANTS(M)
|
||||
#undef M
|
||||
rows_added = fillColumnsFromMap<strictness>(map, columns_keys_and_right);
|
||||
};
|
||||
|
||||
default:
|
||||
throw Exception("Unknown JOIN keys variant.", ErrorCodes::UNKNOWN_SET_DATA_VARIANT);
|
||||
}
|
||||
if (!joinDispatch(parent.kind, parent.strictness, parent.maps, fill_callback))
|
||||
throw Exception("Logical error: unknown JOIN strictness (must be on of: ANY, ALL, ASOF)", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
fillNullsFromBlocks(columns_keys_and_right, rows_added);
|
||||
|
||||
if (!rows_added)
|
||||
return {};
|
||||
@ -1362,25 +1366,44 @@ private:
|
||||
return columns;
|
||||
}
|
||||
|
||||
template <ASTTableJoin::Strictness STRICTNESS, typename Maps>
|
||||
size_t fillColumnsFromMap(const Maps & maps, MutableColumns & columns_keys_and_right)
|
||||
{
|
||||
switch (parent.type)
|
||||
{
|
||||
#define M(TYPE) \
|
||||
case Join::Type::TYPE: \
|
||||
return fillColumns<STRICTNESS>(*maps.TYPE, columns_keys_and_right);
|
||||
APPLY_FOR_JOIN_VARIANTS(M)
|
||||
#undef M
|
||||
default:
|
||||
throw Exception("Unknown JOIN keys variant.", ErrorCodes::UNKNOWN_SET_DATA_VARIANT);
|
||||
}
|
||||
|
||||
__builtin_unreachable();
|
||||
}
|
||||
|
||||
template <ASTTableJoin::Strictness STRICTNESS, typename Map>
|
||||
size_t fillColumns(const Map & map, MutableColumns & columns_keys_and_right)
|
||||
{
|
||||
using Mapped = typename Map::mapped_type;
|
||||
using Iterator = typename Map::const_iterator;
|
||||
|
||||
size_t rows_added = 0;
|
||||
|
||||
if (!position)
|
||||
position = decltype(position)(
|
||||
static_cast<void *>(new typename Map::const_iterator(map.begin())), //-V572
|
||||
[](void * ptr) { delete reinterpret_cast<typename Map::const_iterator *>(ptr); });
|
||||
if (!position.has_value())
|
||||
position = std::make_any<Iterator>(map.begin());
|
||||
|
||||
auto & it = *reinterpret_cast<typename Map::const_iterator *>(position.get());
|
||||
Iterator & it = std::any_cast<Iterator &>(position);
|
||||
auto end = map.end();
|
||||
|
||||
for (; it != end; ++it)
|
||||
{
|
||||
if (it->getSecond().getUsed())
|
||||
const Mapped & mapped = it->getSecond();
|
||||
if (mapped.getUsed())
|
||||
continue;
|
||||
|
||||
AdderNonJoined<STRICTNESS, typename Map::mapped_type>::add(it->getSecond(), rows_added, columns_keys_and_right);
|
||||
AdderNonJoined<STRICTNESS, Mapped>::add(mapped, rows_added, columns_keys_and_right);
|
||||
|
||||
if (rows_added >= max_block_size)
|
||||
{
|
||||
@ -1392,6 +1415,30 @@ private:
|
||||
return rows_added;
|
||||
}
|
||||
|
||||
void fillNullsFromBlocks(MutableColumns & columns_keys_and_right, size_t & rows_added)
|
||||
{
|
||||
if (!nulls_position.has_value())
|
||||
nulls_position = parent.blocks_nullmaps.begin();
|
||||
|
||||
auto end = parent.blocks_nullmaps.end();
|
||||
|
||||
for (auto & it = *nulls_position; it != end && rows_added < max_block_size; ++it)
|
||||
{
|
||||
const Block * block = it->first;
|
||||
const NullMap & nullmap = static_cast<const ColumnUInt8 &>(*it->second).getData();
|
||||
|
||||
for (size_t row = 0; row < nullmap.size(); ++row)
|
||||
{
|
||||
if (nullmap[row])
|
||||
{
|
||||
for (size_t col = 0; col < columns_keys_and_right.size(); ++col)
|
||||
columns_keys_and_right[col]->insertFrom(*block->getByPosition(col).column, row);
|
||||
++rows_added;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static std::unordered_set<size_t> getNullabilityChanges(const Block & sample_block_with_keys, const Block & out_block,
|
||||
const std::vector<size_t> & key_positions,
|
||||
const std::unordered_map<size_t, size_t> & left_to_right_key_map)
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <variant>
|
||||
#include <optional>
|
||||
#include <shared_mutex>
|
||||
#include <deque>
|
||||
|
||||
#include <Parsers/ASTTablesInSelectQuery.h>
|
||||
|
||||
@ -288,10 +289,13 @@ private:
|
||||
/// Overwrite existing values when encountering the same key again
|
||||
bool any_take_last_row;
|
||||
|
||||
/** Blocks of "right" table.
|
||||
*/
|
||||
/// Blocks of "right" table.
|
||||
BlocksList blocks;
|
||||
|
||||
/// Nullmaps for blocks of "right" table (if needed)
|
||||
using BlockNullmapList = std::deque<std::pair<const Block *, ColumnPtr>>;
|
||||
BlockNullmapList blocks_nullmaps;
|
||||
|
||||
MapsVariant maps;
|
||||
|
||||
/// Additional data - strings for string keys and continuation elements of single-linked lists of references to rows.
|
||||
|
@ -4,13 +4,16 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
void extractNestedColumnsAndNullMap(ColumnRawPtrs & key_columns, ColumnPtr & null_map_holder, ConstNullMapPtr & null_map)
|
||||
ColumnPtr extractNestedColumnsAndNullMap(ColumnRawPtrs & key_columns, ConstNullMapPtr & null_map)
|
||||
{
|
||||
ColumnPtr null_map_holder;
|
||||
|
||||
if (key_columns.size() == 1)
|
||||
{
|
||||
auto & column = key_columns[0];
|
||||
if (auto * column_nullable = checkAndGetColumn<ColumnNullable>(*column))
|
||||
{
|
||||
null_map_holder = column_nullable->getNullMapColumnPtr();
|
||||
null_map = &column_nullable->getNullMapData();
|
||||
column = &column_nullable->getNestedColumn();
|
||||
}
|
||||
@ -43,6 +46,8 @@ void extractNestedColumnsAndNullMap(ColumnRawPtrs & key_columns, ColumnPtr & nul
|
||||
|
||||
null_map = null_map_holder ? &static_cast<const ColumnUInt8 &>(*null_map_holder).getData() : nullptr;
|
||||
}
|
||||
|
||||
return null_map_holder;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -6,8 +6,8 @@ namespace DB
|
||||
|
||||
/** Replace Nullable key_columns to corresponding nested columns.
|
||||
* In 'null_map' return a map of positions where at least one column was NULL.
|
||||
* null_map_holder could take ownership of null_map, if required.
|
||||
* @returns ownership column of null_map.
|
||||
*/
|
||||
void extractNestedColumnsAndNullMap(ColumnRawPtrs & key_columns, ColumnPtr & null_map_holder, ConstNullMapPtr & null_map);
|
||||
ColumnPtr extractNestedColumnsAndNullMap(ColumnRawPtrs & key_columns, ConstNullMapPtr & null_map);
|
||||
|
||||
}
|
||||
|
@ -127,9 +127,8 @@ void Set::setHeader(const Block & block)
|
||||
}
|
||||
|
||||
/// We will insert to the Set only keys, where all components are not NULL.
|
||||
ColumnPtr null_map_holder;
|
||||
ConstNullMapPtr null_map{};
|
||||
extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map);
|
||||
ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map);
|
||||
|
||||
if (fill_set_elements)
|
||||
{
|
||||
@ -168,9 +167,8 @@ bool Set::insertFromBlock(const Block & block)
|
||||
size_t rows = block.rows();
|
||||
|
||||
/// We will insert to the Set only keys, where all components are not NULL.
|
||||
ColumnPtr null_map_holder;
|
||||
ConstNullMapPtr null_map{};
|
||||
extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map);
|
||||
ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map);
|
||||
|
||||
/// Filter to extract distinct values from the block.
|
||||
ColumnUInt8::MutablePtr filter;
|
||||
@ -349,9 +347,8 @@ ColumnPtr Set::execute(const Block & block, bool negative) const
|
||||
}
|
||||
|
||||
/// We will check existence in Set only for keys, where all components are not NULL.
|
||||
ColumnPtr null_map_holder;
|
||||
ConstNullMapPtr null_map{};
|
||||
extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map);
|
||||
ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map);
|
||||
|
||||
executeOrdinary(key_columns, vec_res, negative, null_map);
|
||||
|
||||
|
@ -21,3 +21,4 @@
|
||||
12 12
|
||||
13 13
|
||||
14 14
|
||||
\N 8
|
||||
|
@ -206,11 +206,21 @@ self right
|
||||
8 l8 \N 8 l8 \N
|
||||
9 l9 \N 9 l9 \N
|
||||
self right nullable
|
||||
0 \N 4 l5 \N
|
||||
0 \N 4 l6 \N
|
||||
0 \N 5 l7 \N
|
||||
0 \N 8 l8 \N
|
||||
0 \N 9 l9 \N
|
||||
1 l1 1 1 l1 1
|
||||
2 l2 2 2 l2 2
|
||||
2 l3 3 2 l3 3
|
||||
3 l4 4 3 l4 4
|
||||
self right nullable vs not nullable
|
||||
0 \N 4 l5 \N
|
||||
0 \N 4 l6 \N
|
||||
0 \N 5 l7 \N
|
||||
0 \N 8 l8 \N
|
||||
0 \N 9 l9 \N
|
||||
1 l1 1 1 l1 1
|
||||
2 l2 2 2 l2 2
|
||||
2 l3 3 2 l2 2
|
||||
@ -232,6 +242,11 @@ self full
|
||||
8 l8 \N 8 l8 \N
|
||||
9 l9 \N 9 l9 \N
|
||||
self full nullable
|
||||
0 \N 4 l5 \N
|
||||
0 \N 4 l6 \N
|
||||
0 \N 5 l7 \N
|
||||
0 \N 8 l8 \N
|
||||
0 \N 9 l9 \N
|
||||
1 l1 1 1 l1 1
|
||||
2 l2 2 2 l2 2
|
||||
2 l3 3 2 l3 3
|
||||
@ -242,6 +257,11 @@ self full nullable
|
||||
8 l8 \N 0 \N
|
||||
9 l9 \N 0 \N
|
||||
self full nullable vs not nullable
|
||||
0 \N 4 l5 \N
|
||||
0 \N 4 l6 \N
|
||||
0 \N 5 l7 \N
|
||||
0 \N 8 l8 \N
|
||||
0 \N 9 l9 \N
|
||||
1 l1 1 1 l1 1
|
||||
2 l2 2 2 l2 2
|
||||
2 l3 3 2 l2 2
|
||||
|
@ -9,8 +9,11 @@ foo \N 2 0 Nullable(String) Nullable(String)
|
||||
foo 2 0 String Nullable(String)
|
||||
bar bar 1 2 String Nullable(String)
|
||||
test 0 1 String Nullable(String)
|
||||
\N 0 1 String Nullable(String)
|
||||
bar bar 1 2 String Nullable(String)
|
||||
test 0 1 String Nullable(String)
|
||||
\N 0 1 String Nullable(String)
|
||||
foo 2 0 String
|
||||
bar 1 2 String
|
||||
test 0 1 String
|
||||
0 1 String
|
||||
|
@ -0,0 +1,46 @@
|
||||
on
|
||||
n rj n 1 1
|
||||
n rj n id id
|
||||
n rj n \N \N
|
||||
n fj n 1 1
|
||||
n fj n id id
|
||||
n fj n \N \N
|
||||
n fj n \N \N
|
||||
t rj n \N
|
||||
t rj n 1 1
|
||||
t rj n id id
|
||||
t fj n \N
|
||||
t fj n 1 1
|
||||
t fj n id id
|
||||
n rj t 1 1
|
||||
n rj t id id
|
||||
n fj t 1 1
|
||||
n fj t id id
|
||||
n fj t \N \N
|
||||
using
|
||||
n rj n 1 1
|
||||
n rj n id id
|
||||
n rj n \N \N
|
||||
n fj n 1 1
|
||||
n fj n id id
|
||||
n fj n \N \N
|
||||
n fj n \N \N
|
||||
t rj n \N
|
||||
t rj n 1 1
|
||||
t rj n id id
|
||||
t fj n \N
|
||||
t fj n 1 1
|
||||
t fj n id id
|
||||
n rj t 1 1
|
||||
n rj t id id
|
||||
n fj t 1 1
|
||||
n fj t id id
|
||||
n fj t \N \N
|
||||
0 2
|
||||
1001 2
|
||||
1001 2
|
||||
2002 2
|
||||
0 2
|
||||
1001 2
|
||||
1001 2
|
||||
2002 2
|
48
dbms/tests/queries/0_stateless/00875_join_right_nulls.sql
Normal file
48
dbms/tests/queries/0_stateless/00875_join_right_nulls.sql
Normal file
@ -0,0 +1,48 @@
|
||||
DROP TABLE IF EXISTS t;
|
||||
DROP TABLE IF EXISTS nt;
|
||||
|
||||
CREATE TABLE t (x String) ENGINE = Memory;
|
||||
CREATE TABLE nt (x Nullable(String)) ENGINE = Memory;
|
||||
|
||||
INSERT INTO t (x) VALUES ('id'), ('1');
|
||||
INSERT INTO nt (x) VALUES ('id'), (NULL), ('1');
|
||||
|
||||
SET join_use_nulls = 1;
|
||||
|
||||
SELECT 'on';
|
||||
|
||||
SELECT 'n rj n', t1.x, t2.x FROM nt AS t1 RIGHT JOIN nt AS t2 ON t1.x = t2.x ORDER BY t1.x;
|
||||
SELECT 'n fj n', t1.x, t2.x FROM nt AS t1 FULL JOIN nt AS t2 ON t1.x = t2.x ORDER BY t1.x;
|
||||
|
||||
SELECT 't rj n', t1.x, t2.x FROM t AS t1 RIGHT JOIN nt AS t2 ON t1.x = t2.x ORDER BY t1.x;
|
||||
SELECT 't fj n', t1.x, t2.x FROM t AS t1 FULL JOIN nt AS t2 ON t1.x = t2.x ORDER BY t1.x;
|
||||
|
||||
SELECT 'n rj t', t1.x, t2.x FROM nt AS t1 RIGHT JOIN t AS t2 ON t1.x = t2.x ORDER BY t1.x;
|
||||
SELECT 'n fj t', t1.x, t2.x FROM nt AS t1 FULL JOIN t AS t2 ON t1.x = t2.x ORDER BY t1.x;
|
||||
|
||||
SELECT 'using';
|
||||
|
||||
SELECT 'n rj n', t1.x, t2.x FROM nt AS t1 RIGHT JOIN nt AS t2 USING(x) ORDER BY t1.x;
|
||||
SELECT 'n fj n', t1.x, t2.x FROM nt AS t1 FULL JOIN nt AS t2 USING(x) ORDER BY t1.x;
|
||||
|
||||
SELECT 't rj n', t1.x, t2.x FROM t AS t1 RIGHT JOIN nt AS t2 USING(x) ORDER BY t1.x;
|
||||
SELECT 't fj n', t1.x, t2.x FROM t AS t1 FULL JOIN nt AS t2 USING(x) ORDER BY t1.x;
|
||||
|
||||
SELECT 'n rj t', t1.x, t2.x FROM nt AS t1 RIGHT JOIN t AS t2 USING(x) ORDER BY t1.x;
|
||||
SELECT 'n fj t', t1.x, t2.x FROM nt AS t1 FULL JOIN t AS t2 USING(x) ORDER BY t1.x;
|
||||
|
||||
|
||||
INSERT INTO nt (x) SELECT NULL as x FROM numbers(1000);
|
||||
|
||||
SELECT sum(isNull(t1.x)), count(t1.x) FROM nt AS t1 INNER JOIN nt AS t2 ON t1.x = t2.x;
|
||||
SELECT sum(isNull(t1.x)), count(t1.x) FROM nt AS t1 LEFT JOIN nt AS t2 ON t1.x = t2.x;
|
||||
SELECT sum(isNull(t1.x)), count(t1.x) FROM nt AS t1 RIGHT JOIN nt AS t2 ON t1.x = t2.x;
|
||||
SELECT sum(isNull(t1.x)), count(t1.x) FROM nt AS t1 FULL JOIN nt AS t2 ON t1.x = t2.x;
|
||||
|
||||
SELECT sum(isNull(t1.x)), count(t1.x) FROM nt AS t1 INNER JOIN nt AS t2 USING(x);
|
||||
SELECT sum(isNull(t1.x)), count(t1.x) FROM nt AS t1 LEFT JOIN nt AS t2 USING(x);
|
||||
SELECT sum(isNull(t1.x)), count(t1.x) FROM nt AS t1 RIGHT JOIN nt AS t2 USING(x);
|
||||
SELECT sum(isNull(t1.x)), count(t1.x) FROM nt AS t1 FULL JOIN nt AS t2 USING(x);
|
||||
|
||||
DROP TABLE t;
|
||||
DROP TABLE nt;
|
Loading…
Reference in New Issue
Block a user