mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
join right table nulls
This commit is contained in:
parent
0a2c3aeaec
commit
73d6784c59
24
dbms/src/Common/ErasedType.h
Normal file
24
dbms/src/Common/ErasedType.h
Normal file
@ -0,0 +1,24 @@
|
||||
#pragma once
|
||||
#include <memory>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct ErasedType
|
||||
{
|
||||
using Ptr = std::unique_ptr<void, std::function<void(void *)>>;
|
||||
|
||||
template <typename T, typename... Args>
|
||||
static Ptr create(const Args & ... args)
|
||||
{
|
||||
return Ptr(static_cast<void *>(new T(args...)), [](void * ptr) { delete reinterpret_cast<T *>(ptr); });
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static T & get(Ptr & ptr)
|
||||
{
|
||||
return *reinterpret_cast<T *>(ptr.get());
|
||||
}
|
||||
};
|
||||
|
||||
}
|
@ -16,6 +16,7 @@
|
||||
|
||||
#include <Core/ColumnNumbers.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Common/ErasedType.h>
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
|
||||
|
||||
@ -531,9 +532,8 @@ bool Join::insertFromBlock(const Block & block)
|
||||
}
|
||||
|
||||
/// We will insert to the map only keys, where all components are not NULL.
|
||||
ColumnPtr null_map_holder;
|
||||
ConstNullMapPtr null_map{};
|
||||
extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map);
|
||||
ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map);
|
||||
|
||||
size_t rows = block.rows();
|
||||
|
||||
@ -565,6 +565,10 @@ bool Join::insertFromBlock(const Block & block)
|
||||
});
|
||||
}
|
||||
|
||||
/// If RIGHT or FULL save blocks with nulls for NonJoinedBlockInputStream
|
||||
if (isRightOrFull(kind) && null_map)
|
||||
blocks_nullmaps.emplace_back(stored_block, null_map_holder);
|
||||
|
||||
return limits.check(getTotalRowCount(), getTotalByteCount(), "JOIN", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED);
|
||||
}
|
||||
|
||||
@ -797,9 +801,8 @@ void Join::joinBlockImpl(
|
||||
}
|
||||
|
||||
/// Keys with NULL value in any column won't join to anything.
|
||||
ColumnPtr null_map_holder;
|
||||
ConstNullMapPtr null_map{};
|
||||
extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map);
|
||||
ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map);
|
||||
|
||||
size_t existing_columns = block.columns();
|
||||
|
||||
@ -1167,7 +1170,9 @@ class NonJoinedBlockInputStream : public IBlockInputStream
|
||||
public:
|
||||
NonJoinedBlockInputStream(const Join & parent_, const Block & left_sample_block, const Names & key_names_left,
|
||||
const NamesAndTypesList & columns_added_by_join, UInt64 max_block_size_)
|
||||
: parent(parent_), max_block_size(max_block_size_)
|
||||
: parent(parent_)
|
||||
, max_block_size(max_block_size_)
|
||||
, nulls_it(dirtyIterator())
|
||||
{
|
||||
/** left_sample_block contains keys and "left" columns.
|
||||
* result_sample_block - keys, "left" columns, and "right" columns.
|
||||
@ -1235,12 +1240,7 @@ protected:
|
||||
{
|
||||
if (parent.blocks.empty())
|
||||
return Block();
|
||||
|
||||
Block block;
|
||||
if (!joinDispatch(parent.kind, parent.strictness, parent.maps,
|
||||
[&](auto, auto strictness, auto & map) { block = createBlock<strictness>(map); }))
|
||||
throw Exception("Logical error: unknown JOIN strictness (must be on of: ANY, ALL, ASOF)", ErrorCodes::LOGICAL_ERROR);
|
||||
return block;
|
||||
return createBlock();
|
||||
}
|
||||
|
||||
private:
|
||||
@ -1256,8 +1256,14 @@ private:
|
||||
/// Which key columns need change nullability (right is nullable and left is not or vice versa)
|
||||
std::vector<bool> key_nullability_changes;
|
||||
|
||||
std::unique_ptr<void, std::function<void(void *)>> position; /// type erasure
|
||||
ErasedType::Ptr position;
|
||||
Join::BlockNullmapList::const_iterator nulls_it;
|
||||
|
||||
static Join::BlockNullmapList::const_iterator dirtyIterator()
|
||||
{
|
||||
static const Join::BlockNullmapList dirty{};
|
||||
return dirty.end();
|
||||
}
|
||||
|
||||
void makeResultSampleBlock(const Block & left_sample_block, const Block & right_sample_block,
|
||||
const NamesAndTypesList & columns_added_by_join,
|
||||
@ -1304,8 +1310,7 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
template <ASTTableJoin::Strictness STRICTNESS, typename Maps>
|
||||
Block createBlock(const Maps & maps)
|
||||
Block createBlock()
|
||||
{
|
||||
MutableColumns columns_left = columnsForIndex(result_sample_block, column_indices_left);
|
||||
MutableColumns columns_keys_and_right = columnsForIndex(result_sample_block, column_indices_keys_and_right);
|
||||
@ -1315,25 +1320,22 @@ private:
|
||||
|
||||
size_t rows_added = 0;
|
||||
|
||||
switch (parent.type)
|
||||
auto fill_callback = [&](auto, auto strictness, auto & map)
|
||||
{
|
||||
#define M(TYPE) \
|
||||
case Join::Type::TYPE: \
|
||||
rows_added = fillColumns<STRICTNESS>(*maps.TYPE, columns_keys_and_right); \
|
||||
break;
|
||||
APPLY_FOR_JOIN_VARIANTS(M)
|
||||
#undef M
|
||||
rows_added = fillColumnsFromMap<strictness>(map, columns_keys_and_right);
|
||||
};
|
||||
|
||||
default:
|
||||
throw Exception("Unknown JOIN keys variant.", ErrorCodes::UNKNOWN_SET_DATA_VARIANT);
|
||||
}
|
||||
|
||||
if (!rows_added)
|
||||
return {};
|
||||
if (!joinDispatch(parent.kind, parent.strictness, parent.maps, fill_callback))
|
||||
throw Exception("Logical error: unknown JOIN strictness (must be on of: ANY, ALL, ASOF)", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
/// Revert columns nullability
|
||||
changeNullability(columns_keys_and_right, key_nullability_changes);
|
||||
|
||||
fillNullsFromBlocks(columns_keys_and_right, rows_added);
|
||||
|
||||
if (!rows_added)
|
||||
return {};
|
||||
|
||||
Block res = result_sample_block.cloneEmpty();
|
||||
|
||||
/// @note it's possible to make ColumnConst here and materialize it later
|
||||
@ -1362,25 +1364,44 @@ private:
|
||||
return columns;
|
||||
}
|
||||
|
||||
template <ASTTableJoin::Strictness STRICTNESS, typename Maps>
|
||||
size_t fillColumnsFromMap(const Maps & maps, MutableColumns & columns_keys_and_right)
|
||||
{
|
||||
switch (parent.type)
|
||||
{
|
||||
#define M(TYPE) \
|
||||
case Join::Type::TYPE: \
|
||||
return fillColumns<STRICTNESS>(*maps.TYPE, columns_keys_and_right);
|
||||
APPLY_FOR_JOIN_VARIANTS(M)
|
||||
#undef M
|
||||
default:
|
||||
throw Exception("Unknown JOIN keys variant.", ErrorCodes::UNKNOWN_SET_DATA_VARIANT);
|
||||
}
|
||||
|
||||
__builtin_unreachable();
|
||||
}
|
||||
|
||||
template <ASTTableJoin::Strictness STRICTNESS, typename Map>
|
||||
size_t fillColumns(const Map & map, MutableColumns & columns_keys_and_right)
|
||||
{
|
||||
using Mapped = typename Map::mapped_type;
|
||||
using Iterator = typename Map::const_iterator;
|
||||
|
||||
size_t rows_added = 0;
|
||||
|
||||
if (!position)
|
||||
position = decltype(position)(
|
||||
static_cast<void *>(new typename Map::const_iterator(map.begin())), //-V572
|
||||
[](void * ptr) { delete reinterpret_cast<typename Map::const_iterator *>(ptr); });
|
||||
position = ErasedType::create<Iterator>(map.begin());
|
||||
|
||||
auto & it = *reinterpret_cast<typename Map::const_iterator *>(position.get());
|
||||
Iterator & it = ErasedType::get<Iterator>(position);
|
||||
auto end = map.end();
|
||||
|
||||
for (; it != end; ++it)
|
||||
{
|
||||
if (it->getSecond().getUsed())
|
||||
const Mapped & mapped = it->getSecond();
|
||||
if (mapped.getUsed())
|
||||
continue;
|
||||
|
||||
AdderNonJoined<STRICTNESS, typename Map::mapped_type>::add(it->getSecond(), rows_added, columns_keys_and_right);
|
||||
AdderNonJoined<STRICTNESS, Mapped>::add(mapped, rows_added, columns_keys_and_right);
|
||||
|
||||
if (rows_added >= max_block_size)
|
||||
{
|
||||
@ -1392,6 +1413,30 @@ private:
|
||||
return rows_added;
|
||||
}
|
||||
|
||||
void fillNullsFromBlocks(MutableColumns & columns_keys_and_right, size_t & rows_added)
|
||||
{
|
||||
if (nulls_it == dirtyIterator())
|
||||
nulls_it = parent.blocks_nullmaps.begin();
|
||||
|
||||
auto end = parent.blocks_nullmaps.end();
|
||||
|
||||
for (; nulls_it != end && rows_added < max_block_size; ++nulls_it)
|
||||
{
|
||||
const Block * block = nulls_it->first;
|
||||
const NullMap & nullmap = static_cast<const ColumnUInt8 &>(*nulls_it->second).getData();
|
||||
|
||||
for (size_t row = 0; row < nullmap.size(); ++row)
|
||||
{
|
||||
if (nullmap[row])
|
||||
{
|
||||
for (size_t col = 0; col < columns_keys_and_right.size(); ++col)
|
||||
columns_keys_and_right[col]->insertFrom(*block->getByPosition(col).column, row);
|
||||
++rows_added;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static std::unordered_set<size_t> getNullabilityChanges(const Block & sample_block_with_keys, const Block & out_block,
|
||||
const std::vector<size_t> & key_positions,
|
||||
const std::unordered_map<size_t, size_t> & left_to_right_key_map)
|
||||
|
@ -288,10 +288,13 @@ private:
|
||||
/// Overwrite existing values when encountering the same key again
|
||||
bool any_take_last_row;
|
||||
|
||||
/** Blocks of "right" table.
|
||||
*/
|
||||
/// Blocks of "right" table.
|
||||
BlocksList blocks;
|
||||
|
||||
/// Nullmaps for blocks of "right" table (if needed)
|
||||
using BlockNullmapList = std::list<std::pair<const Block *, ColumnPtr>>;
|
||||
BlockNullmapList blocks_nullmaps;
|
||||
|
||||
MapsVariant maps;
|
||||
|
||||
/// Additional data - strings for string keys and continuation elements of single-linked lists of references to rows.
|
||||
|
@ -4,13 +4,16 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
void extractNestedColumnsAndNullMap(ColumnRawPtrs & key_columns, ColumnPtr & null_map_holder, ConstNullMapPtr & null_map)
|
||||
ColumnPtr extractNestedColumnsAndNullMap(ColumnRawPtrs & key_columns, ConstNullMapPtr & null_map)
|
||||
{
|
||||
ColumnPtr null_map_holder;
|
||||
|
||||
if (key_columns.size() == 1)
|
||||
{
|
||||
auto & column = key_columns[0];
|
||||
if (auto * column_nullable = checkAndGetColumn<ColumnNullable>(*column))
|
||||
{
|
||||
null_map_holder = column_nullable->getNullMapColumnPtr();
|
||||
null_map = &column_nullable->getNullMapData();
|
||||
column = &column_nullable->getNestedColumn();
|
||||
}
|
||||
@ -43,6 +46,8 @@ void extractNestedColumnsAndNullMap(ColumnRawPtrs & key_columns, ColumnPtr & nul
|
||||
|
||||
null_map = null_map_holder ? &static_cast<const ColumnUInt8 &>(*null_map_holder).getData() : nullptr;
|
||||
}
|
||||
|
||||
return null_map_holder;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -6,8 +6,8 @@ namespace DB
|
||||
|
||||
/** Replace Nullable key_columns to corresponding nested columns.
|
||||
* In 'null_map' return a map of positions where at least one column was NULL.
|
||||
* null_map_holder could take ownership of null_map, if required.
|
||||
* @returns ownership column of null_map.
|
||||
*/
|
||||
void extractNestedColumnsAndNullMap(ColumnRawPtrs & key_columns, ColumnPtr & null_map_holder, ConstNullMapPtr & null_map);
|
||||
ColumnPtr extractNestedColumnsAndNullMap(ColumnRawPtrs & key_columns, ConstNullMapPtr & null_map);
|
||||
|
||||
}
|
||||
|
@ -127,9 +127,8 @@ void Set::setHeader(const Block & block)
|
||||
}
|
||||
|
||||
/// We will insert to the Set only keys, where all components are not NULL.
|
||||
ColumnPtr null_map_holder;
|
||||
ConstNullMapPtr null_map{};
|
||||
extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map);
|
||||
ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map);
|
||||
|
||||
if (fill_set_elements)
|
||||
{
|
||||
@ -168,9 +167,8 @@ bool Set::insertFromBlock(const Block & block)
|
||||
size_t rows = block.rows();
|
||||
|
||||
/// We will insert to the Set only keys, where all components are not NULL.
|
||||
ColumnPtr null_map_holder;
|
||||
ConstNullMapPtr null_map{};
|
||||
extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map);
|
||||
ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map);
|
||||
|
||||
/// Filter to extract distinct values from the block.
|
||||
ColumnUInt8::MutablePtr filter;
|
||||
@ -349,9 +347,8 @@ ColumnPtr Set::execute(const Block & block, bool negative) const
|
||||
}
|
||||
|
||||
/// We will check existence in Set only for keys, where all components are not NULL.
|
||||
ColumnPtr null_map_holder;
|
||||
ConstNullMapPtr null_map{};
|
||||
extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map);
|
||||
ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map);
|
||||
|
||||
executeOrdinary(key_columns, vec_res, negative, null_map);
|
||||
|
||||
|
@ -0,0 +1,22 @@
|
||||
n rj n 1 1
|
||||
n rj n id id
|
||||
n rj n \N \N
|
||||
n fj n 1 1
|
||||
n fj n id id
|
||||
n fj n \N \N
|
||||
n fj n \N \N
|
||||
t rj n \N
|
||||
t rj n 1 1
|
||||
t rj n id id
|
||||
t fj n \N
|
||||
t fj n 1 1
|
||||
t fj n id id
|
||||
n rj t 1 1
|
||||
n rj t id id
|
||||
n fj t 1 1
|
||||
n fj t id id
|
||||
n fj t \N \N
|
||||
0 2
|
||||
1001 2
|
||||
1001 2
|
||||
2002 2
|
28
dbms/tests/queries/0_stateless/00875_join_right_nulls.sql
Normal file
28
dbms/tests/queries/0_stateless/00875_join_right_nulls.sql
Normal file
@ -0,0 +1,28 @@
|
||||
DROP TABLE IF EXISTS t;
|
||||
DROP TABLE IF EXISTS nt;
|
||||
|
||||
CREATE TABLE t (x String) ENGINE = Memory;
|
||||
CREATE TABLE nt (x Nullable(String)) ENGINE = Memory;
|
||||
|
||||
INSERT INTO t (x) VALUES ('id'), ('1');
|
||||
INSERT INTO nt (x) VALUES ('id'), (NULL), ('1');
|
||||
|
||||
SET join_use_nulls = 1;
|
||||
|
||||
SELECT 'n rj n', t1.x, t2.x FROM nt AS t1 RIGHT JOIN nt AS t2 ON t1.x = t2.x ORDER BY t1.x;
|
||||
SELECT 'n fj n', t1.x, t2.x FROM nt AS t1 FULL JOIN nt AS t2 ON t1.x = t2.x ORDER BY t1.x;
|
||||
|
||||
SELECT 't rj n', t1.x, t2.x FROM t AS t1 RIGHT JOIN nt AS t2 ON t1.x = t2.x ORDER BY t1.x;
|
||||
SELECT 't fj n', t1.x, t2.x FROM t AS t1 FULL JOIN nt AS t2 ON t1.x = t2.x ORDER BY t1.x;
|
||||
|
||||
SELECT 'n rj t', t1.x, t2.x FROM nt AS t1 RIGHT JOIN t AS t2 ON t1.x = t2.x ORDER BY t1.x;
|
||||
SELECT 'n fj t', t1.x, t2.x FROM nt AS t1 FULL JOIN t AS t2 ON t1.x = t2.x ORDER BY t1.x;
|
||||
|
||||
INSERT INTO nt (x) SELECT NULL as x FROM numbers(1000);
|
||||
SELECT sum(isNull(t1.x)), count(t1.x) FROM nt AS t1 INNER JOIN nt AS t2 ON t1.x = t2.x;
|
||||
SELECT sum(isNull(t1.x)), count(t1.x) FROM nt AS t1 LEFT JOIN nt AS t2 ON t1.x = t2.x;
|
||||
SELECT sum(isNull(t1.x)), count(t1.x) FROM nt AS t1 RIGHT JOIN nt AS t2 ON t1.x = t2.x;
|
||||
SELECT sum(isNull(t1.x)), count(t1.x) FROM nt AS t1 FULL JOIN nt AS t2 ON t1.x = t2.x;
|
||||
|
||||
DROP TABLE t;
|
||||
DROP TABLE nt;
|
Loading…
Reference in New Issue
Block a user