join right table nulls

This commit is contained in:
chertus 2019-07-03 22:06:34 +03:00
parent 0a2c3aeaec
commit 73d6784c59
8 changed files with 168 additions and 44 deletions

View File

@ -0,0 +1,24 @@
#pragma once
#include <memory>
namespace DB
{
struct ErasedType
{
using Ptr = std::unique_ptr<void, std::function<void(void *)>>;
template <typename T, typename... Args>
static Ptr create(const Args & ... args)
{
return Ptr(static_cast<void *>(new T(args...)), [](void * ptr) { delete reinterpret_cast<T *>(ptr); });
}
template <typename T>
static T & get(Ptr & ptr)
{
return *reinterpret_cast<T *>(ptr.get());
}
};
}

View File

@ -16,6 +16,7 @@
#include <Core/ColumnNumbers.h>
#include <Common/typeid_cast.h>
#include <Common/ErasedType.h>
#include <DataTypes/DataTypeLowCardinality.h>
@ -531,9 +532,8 @@ bool Join::insertFromBlock(const Block & block)
}
/// We will insert to the map only keys, where all components are not NULL.
ColumnPtr null_map_holder;
ConstNullMapPtr null_map{};
extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map);
ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map);
size_t rows = block.rows();
@ -565,6 +565,10 @@ bool Join::insertFromBlock(const Block & block)
});
}
/// If RIGHT or FULL save blocks with nulls for NonJoinedBlockInputStream
if (isRightOrFull(kind) && null_map)
blocks_nullmaps.emplace_back(stored_block, null_map_holder);
return limits.check(getTotalRowCount(), getTotalByteCount(), "JOIN", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED);
}
@ -797,9 +801,8 @@ void Join::joinBlockImpl(
}
/// Keys with NULL value in any column won't join to anything.
ColumnPtr null_map_holder;
ConstNullMapPtr null_map{};
extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map);
ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map);
size_t existing_columns = block.columns();
@ -1167,7 +1170,9 @@ class NonJoinedBlockInputStream : public IBlockInputStream
public:
NonJoinedBlockInputStream(const Join & parent_, const Block & left_sample_block, const Names & key_names_left,
const NamesAndTypesList & columns_added_by_join, UInt64 max_block_size_)
: parent(parent_), max_block_size(max_block_size_)
: parent(parent_)
, max_block_size(max_block_size_)
, nulls_it(dirtyIterator())
{
/** left_sample_block contains keys and "left" columns.
* result_sample_block - keys, "left" columns, and "right" columns.
@ -1235,12 +1240,7 @@ protected:
{
if (parent.blocks.empty())
return Block();
Block block;
if (!joinDispatch(parent.kind, parent.strictness, parent.maps,
[&](auto, auto strictness, auto & map) { block = createBlock<strictness>(map); }))
throw Exception("Logical error: unknown JOIN strictness (must be on of: ANY, ALL, ASOF)", ErrorCodes::LOGICAL_ERROR);
return block;
return createBlock();
}
private:
@ -1256,8 +1256,14 @@ private:
/// Which key columns need change nullability (right is nullable and left is not or vice versa)
std::vector<bool> key_nullability_changes;
std::unique_ptr<void, std::function<void(void *)>> position; /// type erasure
ErasedType::Ptr position;
Join::BlockNullmapList::const_iterator nulls_it;
static Join::BlockNullmapList::const_iterator dirtyIterator()
{
static const Join::BlockNullmapList dirty{};
return dirty.end();
}
void makeResultSampleBlock(const Block & left_sample_block, const Block & right_sample_block,
const NamesAndTypesList & columns_added_by_join,
@ -1304,8 +1310,7 @@ private:
}
}
template <ASTTableJoin::Strictness STRICTNESS, typename Maps>
Block createBlock(const Maps & maps)
Block createBlock()
{
MutableColumns columns_left = columnsForIndex(result_sample_block, column_indices_left);
MutableColumns columns_keys_and_right = columnsForIndex(result_sample_block, column_indices_keys_and_right);
@ -1315,25 +1320,22 @@ private:
size_t rows_added = 0;
switch (parent.type)
auto fill_callback = [&](auto, auto strictness, auto & map)
{
#define M(TYPE) \
case Join::Type::TYPE: \
rows_added = fillColumns<STRICTNESS>(*maps.TYPE, columns_keys_and_right); \
break;
APPLY_FOR_JOIN_VARIANTS(M)
#undef M
rows_added = fillColumnsFromMap<strictness>(map, columns_keys_and_right);
};
default:
throw Exception("Unknown JOIN keys variant.", ErrorCodes::UNKNOWN_SET_DATA_VARIANT);
}
if (!rows_added)
return {};
if (!joinDispatch(parent.kind, parent.strictness, parent.maps, fill_callback))
throw Exception("Logical error: unknown JOIN strictness (must be on of: ANY, ALL, ASOF)", ErrorCodes::LOGICAL_ERROR);
/// Revert columns nullability
changeNullability(columns_keys_and_right, key_nullability_changes);
fillNullsFromBlocks(columns_keys_and_right, rows_added);
if (!rows_added)
return {};
Block res = result_sample_block.cloneEmpty();
/// @note it's possible to make ColumnConst here and materialize it later
@ -1362,25 +1364,44 @@ private:
return columns;
}
template <ASTTableJoin::Strictness STRICTNESS, typename Maps>
size_t fillColumnsFromMap(const Maps & maps, MutableColumns & columns_keys_and_right)
{
switch (parent.type)
{
#define M(TYPE) \
case Join::Type::TYPE: \
return fillColumns<STRICTNESS>(*maps.TYPE, columns_keys_and_right);
APPLY_FOR_JOIN_VARIANTS(M)
#undef M
default:
throw Exception("Unknown JOIN keys variant.", ErrorCodes::UNKNOWN_SET_DATA_VARIANT);
}
__builtin_unreachable();
}
template <ASTTableJoin::Strictness STRICTNESS, typename Map>
size_t fillColumns(const Map & map, MutableColumns & columns_keys_and_right)
{
using Mapped = typename Map::mapped_type;
using Iterator = typename Map::const_iterator;
size_t rows_added = 0;
if (!position)
position = decltype(position)(
static_cast<void *>(new typename Map::const_iterator(map.begin())), //-V572
[](void * ptr) { delete reinterpret_cast<typename Map::const_iterator *>(ptr); });
position = ErasedType::create<Iterator>(map.begin());
auto & it = *reinterpret_cast<typename Map::const_iterator *>(position.get());
Iterator & it = ErasedType::get<Iterator>(position);
auto end = map.end();
for (; it != end; ++it)
{
if (it->getSecond().getUsed())
const Mapped & mapped = it->getSecond();
if (mapped.getUsed())
continue;
AdderNonJoined<STRICTNESS, typename Map::mapped_type>::add(it->getSecond(), rows_added, columns_keys_and_right);
AdderNonJoined<STRICTNESS, Mapped>::add(mapped, rows_added, columns_keys_and_right);
if (rows_added >= max_block_size)
{
@ -1392,6 +1413,30 @@ private:
return rows_added;
}
void fillNullsFromBlocks(MutableColumns & columns_keys_and_right, size_t & rows_added)
{
if (nulls_it == dirtyIterator())
nulls_it = parent.blocks_nullmaps.begin();
auto end = parent.blocks_nullmaps.end();
for (; nulls_it != end && rows_added < max_block_size; ++nulls_it)
{
const Block * block = nulls_it->first;
const NullMap & nullmap = static_cast<const ColumnUInt8 &>(*nulls_it->second).getData();
for (size_t row = 0; row < nullmap.size(); ++row)
{
if (nullmap[row])
{
for (size_t col = 0; col < columns_keys_and_right.size(); ++col)
columns_keys_and_right[col]->insertFrom(*block->getByPosition(col).column, row);
++rows_added;
}
}
}
}
static std::unordered_set<size_t> getNullabilityChanges(const Block & sample_block_with_keys, const Block & out_block,
const std::vector<size_t> & key_positions,
const std::unordered_map<size_t, size_t> & left_to_right_key_map)

View File

@ -288,10 +288,13 @@ private:
/// Overwrite existing values when encountering the same key again
bool any_take_last_row;
/** Blocks of "right" table.
*/
/// Blocks of "right" table.
BlocksList blocks;
/// Nullmaps for blocks of "right" table (if needed)
using BlockNullmapList = std::list<std::pair<const Block *, ColumnPtr>>;
BlockNullmapList blocks_nullmaps;
MapsVariant maps;
/// Additional data - strings for string keys and continuation elements of single-linked lists of references to rows.

View File

@ -4,13 +4,16 @@
namespace DB
{
void extractNestedColumnsAndNullMap(ColumnRawPtrs & key_columns, ColumnPtr & null_map_holder, ConstNullMapPtr & null_map)
ColumnPtr extractNestedColumnsAndNullMap(ColumnRawPtrs & key_columns, ConstNullMapPtr & null_map)
{
ColumnPtr null_map_holder;
if (key_columns.size() == 1)
{
auto & column = key_columns[0];
if (auto * column_nullable = checkAndGetColumn<ColumnNullable>(*column))
{
null_map_holder = column_nullable->getNullMapColumnPtr();
null_map = &column_nullable->getNullMapData();
column = &column_nullable->getNestedColumn();
}
@ -43,6 +46,8 @@ void extractNestedColumnsAndNullMap(ColumnRawPtrs & key_columns, ColumnPtr & nul
null_map = null_map_holder ? &static_cast<const ColumnUInt8 &>(*null_map_holder).getData() : nullptr;
}
return null_map_holder;
}
}

View File

@ -6,8 +6,8 @@ namespace DB
/** Replace Nullable key_columns to corresponding nested columns.
* In 'null_map' return a map of positions where at least one column was NULL.
* null_map_holder could take ownership of null_map, if required.
* @returns ownership column of null_map.
*/
void extractNestedColumnsAndNullMap(ColumnRawPtrs & key_columns, ColumnPtr & null_map_holder, ConstNullMapPtr & null_map);
ColumnPtr extractNestedColumnsAndNullMap(ColumnRawPtrs & key_columns, ConstNullMapPtr & null_map);
}

View File

@ -127,9 +127,8 @@ void Set::setHeader(const Block & block)
}
/// We will insert to the Set only keys, where all components are not NULL.
ColumnPtr null_map_holder;
ConstNullMapPtr null_map{};
extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map);
ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map);
if (fill_set_elements)
{
@ -168,9 +167,8 @@ bool Set::insertFromBlock(const Block & block)
size_t rows = block.rows();
/// We will insert to the Set only keys, where all components are not NULL.
ColumnPtr null_map_holder;
ConstNullMapPtr null_map{};
extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map);
ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map);
/// Filter to extract distinct values from the block.
ColumnUInt8::MutablePtr filter;
@ -349,9 +347,8 @@ ColumnPtr Set::execute(const Block & block, bool negative) const
}
/// We will check existence in Set only for keys, where all components are not NULL.
ColumnPtr null_map_holder;
ConstNullMapPtr null_map{};
extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map);
ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map);
executeOrdinary(key_columns, vec_res, negative, null_map);

View File

@ -0,0 +1,22 @@
n rj n 1 1
n rj n id id
n rj n \N \N
n fj n 1 1
n fj n id id
n fj n \N \N
n fj n \N \N
t rj n \N
t rj n 1 1
t rj n id id
t fj n \N
t fj n 1 1
t fj n id id
n rj t 1 1
n rj t id id
n fj t 1 1
n fj t id id
n fj t \N \N
0 2
1001 2
1001 2
2002 2

View File

@ -0,0 +1,28 @@
DROP TABLE IF EXISTS t;
DROP TABLE IF EXISTS nt;
CREATE TABLE t (x String) ENGINE = Memory;
CREATE TABLE nt (x Nullable(String)) ENGINE = Memory;
INSERT INTO t (x) VALUES ('id'), ('1');
INSERT INTO nt (x) VALUES ('id'), (NULL), ('1');
SET join_use_nulls = 1;
SELECT 'n rj n', t1.x, t2.x FROM nt AS t1 RIGHT JOIN nt AS t2 ON t1.x = t2.x ORDER BY t1.x;
SELECT 'n fj n', t1.x, t2.x FROM nt AS t1 FULL JOIN nt AS t2 ON t1.x = t2.x ORDER BY t1.x;
SELECT 't rj n', t1.x, t2.x FROM t AS t1 RIGHT JOIN nt AS t2 ON t1.x = t2.x ORDER BY t1.x;
SELECT 't fj n', t1.x, t2.x FROM t AS t1 FULL JOIN nt AS t2 ON t1.x = t2.x ORDER BY t1.x;
SELECT 'n rj t', t1.x, t2.x FROM nt AS t1 RIGHT JOIN t AS t2 ON t1.x = t2.x ORDER BY t1.x;
SELECT 'n fj t', t1.x, t2.x FROM nt AS t1 FULL JOIN t AS t2 ON t1.x = t2.x ORDER BY t1.x;
INSERT INTO nt (x) SELECT NULL as x FROM numbers(1000);
SELECT sum(isNull(t1.x)), count(t1.x) FROM nt AS t1 INNER JOIN nt AS t2 ON t1.x = t2.x;
SELECT sum(isNull(t1.x)), count(t1.x) FROM nt AS t1 LEFT JOIN nt AS t2 ON t1.x = t2.x;
SELECT sum(isNull(t1.x)), count(t1.x) FROM nt AS t1 RIGHT JOIN nt AS t2 ON t1.x = t2.x;
SELECT sum(isNull(t1.x)), count(t1.x) FROM nt AS t1 FULL JOIN nt AS t2 ON t1.x = t2.x;
DROP TABLE t;
DROP TABLE nt;