Merge pull request #5859 from 4ertus2/joins

Fix FULL|RIGHT JOIN with nulls in right table's keys
This commit is contained in:
alexey-milovidov 2019-07-05 14:23:35 +03:00 committed by GitHub
commit 8b62c96989
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 213 additions and 42 deletions

View File

@ -1,3 +1,5 @@
#include <any>
#include <common/logger_useful.h>
#include <Columns/ColumnConst.h>
@ -531,9 +533,8 @@ bool Join::insertFromBlock(const Block & block)
}
/// We will insert to the map only keys, where all components are not NULL.
ColumnPtr null_map_holder;
ConstNullMapPtr null_map{};
extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map);
ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map);
size_t rows = block.rows();
@ -565,6 +566,17 @@ bool Join::insertFromBlock(const Block & block)
});
}
/// If RIGHT or FULL save blocks with nulls for NonJoinedBlockInputStream
if (isRightOrFull(kind) && null_map)
{
UInt8 has_null = 0;
for (size_t i = 0; !has_null && i < null_map->size(); ++i)
has_null |= (*null_map)[i];
if (has_null)
blocks_nullmaps.emplace_back(stored_block, null_map_holder);
}
return limits.check(getTotalRowCount(), getTotalByteCount(), "JOIN", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED);
}
@ -797,9 +809,8 @@ void Join::joinBlockImpl(
}
/// Keys with NULL value in any column won't join to anything.
ColumnPtr null_map_holder;
ConstNullMapPtr null_map{};
extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map);
ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map);
size_t existing_columns = block.columns();
@ -1167,7 +1178,8 @@ class NonJoinedBlockInputStream : public IBlockInputStream
public:
NonJoinedBlockInputStream(const Join & parent_, const Block & left_sample_block, const Names & key_names_left,
const NamesAndTypesList & columns_added_by_join, UInt64 max_block_size_)
: parent(parent_), max_block_size(max_block_size_)
: parent(parent_)
, max_block_size(max_block_size_)
{
/** left_sample_block contains keys and "left" columns.
* result_sample_block - keys, "left" columns, and "right" columns.
@ -1235,12 +1247,7 @@ protected:
{
if (parent.blocks.empty())
return Block();
Block block;
if (!joinDispatch(parent.kind, parent.strictness, parent.maps,
[&](auto, auto strictness, auto & map) { block = createBlock<strictness>(map); }))
throw Exception("Logical error: unknown JOIN strictness (must be on of: ANY, ALL, ASOF)", ErrorCodes::LOGICAL_ERROR);
return block;
return createBlock();
}
private:
@ -1256,7 +1263,8 @@ private:
/// Which key columns need change nullability (right is nullable and left is not or vice versa)
std::vector<bool> key_nullability_changes;
std::unique_ptr<void, std::function<void(void *)>> position; /// type erasure
std::any position;
std::optional<Join::BlockNullmapList::const_iterator> nulls_position;
void makeResultSampleBlock(const Block & left_sample_block, const Block & right_sample_block,
@ -1304,8 +1312,7 @@ private:
}
}
template <ASTTableJoin::Strictness STRICTNESS, typename Maps>
Block createBlock(const Maps & maps)
Block createBlock()
{
MutableColumns columns_left = columnsForIndex(result_sample_block, column_indices_left);
MutableColumns columns_keys_and_right = columnsForIndex(result_sample_block, column_indices_keys_and_right);
@ -1315,18 +1322,15 @@ private:
size_t rows_added = 0;
switch (parent.type)
auto fill_callback = [&](auto, auto strictness, auto & map)
{
#define M(TYPE) \
case Join::Type::TYPE: \
rows_added = fillColumns<STRICTNESS>(*maps.TYPE, columns_keys_and_right); \
break;
APPLY_FOR_JOIN_VARIANTS(M)
#undef M
rows_added = fillColumnsFromMap<strictness>(map, columns_keys_and_right);
};
default:
throw Exception("Unknown JOIN keys variant.", ErrorCodes::UNKNOWN_SET_DATA_VARIANT);
}
if (!joinDispatch(parent.kind, parent.strictness, parent.maps, fill_callback))
throw Exception("Logical error: unknown JOIN strictness (must be on of: ANY, ALL, ASOF)", ErrorCodes::LOGICAL_ERROR);
fillNullsFromBlocks(columns_keys_and_right, rows_added);
if (!rows_added)
return {};
@ -1362,25 +1366,44 @@ private:
return columns;
}
template <ASTTableJoin::Strictness STRICTNESS, typename Maps>
size_t fillColumnsFromMap(const Maps & maps, MutableColumns & columns_keys_and_right)
{
switch (parent.type)
{
#define M(TYPE) \
case Join::Type::TYPE: \
return fillColumns<STRICTNESS>(*maps.TYPE, columns_keys_and_right);
APPLY_FOR_JOIN_VARIANTS(M)
#undef M
default:
throw Exception("Unknown JOIN keys variant.", ErrorCodes::UNKNOWN_SET_DATA_VARIANT);
}
__builtin_unreachable();
}
template <ASTTableJoin::Strictness STRICTNESS, typename Map>
size_t fillColumns(const Map & map, MutableColumns & columns_keys_and_right)
{
using Mapped = typename Map::mapped_type;
using Iterator = typename Map::const_iterator;
size_t rows_added = 0;
if (!position)
position = decltype(position)(
static_cast<void *>(new typename Map::const_iterator(map.begin())), //-V572
[](void * ptr) { delete reinterpret_cast<typename Map::const_iterator *>(ptr); });
if (!position.has_value())
position = std::make_any<Iterator>(map.begin());
auto & it = *reinterpret_cast<typename Map::const_iterator *>(position.get());
Iterator & it = std::any_cast<Iterator &>(position);
auto end = map.end();
for (; it != end; ++it)
{
if (it->getSecond().getUsed())
const Mapped & mapped = it->getSecond();
if (mapped.getUsed())
continue;
AdderNonJoined<STRICTNESS, typename Map::mapped_type>::add(it->getSecond(), rows_added, columns_keys_and_right);
AdderNonJoined<STRICTNESS, Mapped>::add(mapped, rows_added, columns_keys_and_right);
if (rows_added >= max_block_size)
{
@ -1392,6 +1415,30 @@ private:
return rows_added;
}
void fillNullsFromBlocks(MutableColumns & columns_keys_and_right, size_t & rows_added)
{
if (!nulls_position.has_value())
nulls_position = parent.blocks_nullmaps.begin();
auto end = parent.blocks_nullmaps.end();
for (auto & it = *nulls_position; it != end && rows_added < max_block_size; ++it)
{
const Block * block = it->first;
const NullMap & nullmap = static_cast<const ColumnUInt8 &>(*it->second).getData();
for (size_t row = 0; row < nullmap.size(); ++row)
{
if (nullmap[row])
{
for (size_t col = 0; col < columns_keys_and_right.size(); ++col)
columns_keys_and_right[col]->insertFrom(*block->getByPosition(col).column, row);
++rows_added;
}
}
}
}
static std::unordered_set<size_t> getNullabilityChanges(const Block & sample_block_with_keys, const Block & out_block,
const std::vector<size_t> & key_positions,
const std::unordered_map<size_t, size_t> & left_to_right_key_map)

View File

@ -3,6 +3,7 @@
#include <variant>
#include <optional>
#include <shared_mutex>
#include <deque>
#include <Parsers/ASTTablesInSelectQuery.h>
@ -288,10 +289,13 @@ private:
/// Overwrite existing values when encountering the same key again
bool any_take_last_row;
/** Blocks of "right" table.
*/
/// Blocks of "right" table.
BlocksList blocks;
/// Nullmaps for blocks of "right" table (if needed)
using BlockNullmapList = std::deque<std::pair<const Block *, ColumnPtr>>;
BlockNullmapList blocks_nullmaps;
MapsVariant maps;
/// Additional data - strings for string keys and continuation elements of single-linked lists of references to rows.

View File

@ -4,13 +4,16 @@
namespace DB
{
void extractNestedColumnsAndNullMap(ColumnRawPtrs & key_columns, ColumnPtr & null_map_holder, ConstNullMapPtr & null_map)
ColumnPtr extractNestedColumnsAndNullMap(ColumnRawPtrs & key_columns, ConstNullMapPtr & null_map)
{
ColumnPtr null_map_holder;
if (key_columns.size() == 1)
{
auto & column = key_columns[0];
if (auto * column_nullable = checkAndGetColumn<ColumnNullable>(*column))
{
null_map_holder = column_nullable->getNullMapColumnPtr();
null_map = &column_nullable->getNullMapData();
column = &column_nullable->getNestedColumn();
}
@ -43,6 +46,8 @@ void extractNestedColumnsAndNullMap(ColumnRawPtrs & key_columns, ColumnPtr & nul
null_map = null_map_holder ? &static_cast<const ColumnUInt8 &>(*null_map_holder).getData() : nullptr;
}
return null_map_holder;
}
}

View File

@ -6,8 +6,8 @@ namespace DB
/** Replace Nullable key_columns to corresponding nested columns.
* In 'null_map' return a map of positions where at least one column was NULL.
* null_map_holder could take ownership of null_map, if required.
* @returns ownership column of null_map.
*/
void extractNestedColumnsAndNullMap(ColumnRawPtrs & key_columns, ColumnPtr & null_map_holder, ConstNullMapPtr & null_map);
ColumnPtr extractNestedColumnsAndNullMap(ColumnRawPtrs & key_columns, ConstNullMapPtr & null_map);
}

View File

@ -127,9 +127,8 @@ void Set::setHeader(const Block & block)
}
/// We will insert to the Set only keys, where all components are not NULL.
ColumnPtr null_map_holder;
ConstNullMapPtr null_map{};
extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map);
ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map);
if (fill_set_elements)
{
@ -168,9 +167,8 @@ bool Set::insertFromBlock(const Block & block)
size_t rows = block.rows();
/// We will insert to the Set only keys, where all components are not NULL.
ColumnPtr null_map_holder;
ConstNullMapPtr null_map{};
extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map);
ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map);
/// Filter to extract distinct values from the block.
ColumnUInt8::MutablePtr filter;
@ -349,9 +347,8 @@ ColumnPtr Set::execute(const Block & block, bool negative) const
}
/// We will check existence in Set only for keys, where all components are not NULL.
ColumnPtr null_map_holder;
ConstNullMapPtr null_map{};
extractNestedColumnsAndNullMap(key_columns, null_map_holder, null_map);
ColumnPtr null_map_holder = extractNestedColumnsAndNullMap(key_columns, null_map);
executeOrdinary(key_columns, vec_res, negative, null_map);

View File

@ -21,3 +21,4 @@
12 12
13 13
14 14
\N 8

View File

@ -206,11 +206,21 @@ self right
8 l8 \N 8 l8 \N
9 l9 \N 9 l9 \N
self right nullable
0 \N 4 l5 \N
0 \N 4 l6 \N
0 \N 5 l7 \N
0 \N 8 l8 \N
0 \N 9 l9 \N
1 l1 1 1 l1 1
2 l2 2 2 l2 2
2 l3 3 2 l3 3
3 l4 4 3 l4 4
self right nullable vs not nullable
0 \N 4 l5 \N
0 \N 4 l6 \N
0 \N 5 l7 \N
0 \N 8 l8 \N
0 \N 9 l9 \N
1 l1 1 1 l1 1
2 l2 2 2 l2 2
2 l3 3 2 l2 2
@ -232,6 +242,11 @@ self full
8 l8 \N 8 l8 \N
9 l9 \N 9 l9 \N
self full nullable
0 \N 4 l5 \N
0 \N 4 l6 \N
0 \N 5 l7 \N
0 \N 8 l8 \N
0 \N 9 l9 \N
1 l1 1 1 l1 1
2 l2 2 2 l2 2
2 l3 3 2 l3 3
@ -242,6 +257,11 @@ self full nullable
8 l8 \N 0 \N
9 l9 \N 0 \N
self full nullable vs not nullable
0 \N 4 l5 \N
0 \N 4 l6 \N
0 \N 5 l7 \N
0 \N 8 l8 \N
0 \N 9 l9 \N
1 l1 1 1 l1 1
2 l2 2 2 l2 2
2 l3 3 2 l2 2

View File

@ -9,8 +9,11 @@ foo \N 2 0 Nullable(String) Nullable(String)
foo 2 0 String Nullable(String)
bar bar 1 2 String Nullable(String)
test 0 1 String Nullable(String)
\N 0 1 String Nullable(String)
bar bar 1 2 String Nullable(String)
test 0 1 String Nullable(String)
\N 0 1 String Nullable(String)
foo 2 0 String
bar 1 2 String
test 0 1 String
0 1 String

View File

@ -0,0 +1,46 @@
on
n rj n 1 1
n rj n id id
n rj n \N \N
n fj n 1 1
n fj n id id
n fj n \N \N
n fj n \N \N
t rj n \N
t rj n 1 1
t rj n id id
t fj n \N
t fj n 1 1
t fj n id id
n rj t 1 1
n rj t id id
n fj t 1 1
n fj t id id
n fj t \N \N
using
n rj n 1 1
n rj n id id
n rj n \N \N
n fj n 1 1
n fj n id id
n fj n \N \N
n fj n \N \N
t rj n \N
t rj n 1 1
t rj n id id
t fj n \N
t fj n 1 1
t fj n id id
n rj t 1 1
n rj t id id
n fj t 1 1
n fj t id id
n fj t \N \N
0 2
1001 2
1001 2
2002 2
0 2
1001 2
1001 2
2002 2

View File

@ -0,0 +1,48 @@
DROP TABLE IF EXISTS t;
DROP TABLE IF EXISTS nt;
CREATE TABLE t (x String) ENGINE = Memory;
CREATE TABLE nt (x Nullable(String)) ENGINE = Memory;
INSERT INTO t (x) VALUES ('id'), ('1');
INSERT INTO nt (x) VALUES ('id'), (NULL), ('1');
SET join_use_nulls = 1;
SELECT 'on';
SELECT 'n rj n', t1.x, t2.x FROM nt AS t1 RIGHT JOIN nt AS t2 ON t1.x = t2.x ORDER BY t1.x;
SELECT 'n fj n', t1.x, t2.x FROM nt AS t1 FULL JOIN nt AS t2 ON t1.x = t2.x ORDER BY t1.x;
SELECT 't rj n', t1.x, t2.x FROM t AS t1 RIGHT JOIN nt AS t2 ON t1.x = t2.x ORDER BY t1.x;
SELECT 't fj n', t1.x, t2.x FROM t AS t1 FULL JOIN nt AS t2 ON t1.x = t2.x ORDER BY t1.x;
SELECT 'n rj t', t1.x, t2.x FROM nt AS t1 RIGHT JOIN t AS t2 ON t1.x = t2.x ORDER BY t1.x;
SELECT 'n fj t', t1.x, t2.x FROM nt AS t1 FULL JOIN t AS t2 ON t1.x = t2.x ORDER BY t1.x;
SELECT 'using';
SELECT 'n rj n', t1.x, t2.x FROM nt AS t1 RIGHT JOIN nt AS t2 USING(x) ORDER BY t1.x;
SELECT 'n fj n', t1.x, t2.x FROM nt AS t1 FULL JOIN nt AS t2 USING(x) ORDER BY t1.x;
SELECT 't rj n', t1.x, t2.x FROM t AS t1 RIGHT JOIN nt AS t2 USING(x) ORDER BY t1.x;
SELECT 't fj n', t1.x, t2.x FROM t AS t1 FULL JOIN nt AS t2 USING(x) ORDER BY t1.x;
SELECT 'n rj t', t1.x, t2.x FROM nt AS t1 RIGHT JOIN t AS t2 USING(x) ORDER BY t1.x;
SELECT 'n fj t', t1.x, t2.x FROM nt AS t1 FULL JOIN t AS t2 USING(x) ORDER BY t1.x;
INSERT INTO nt (x) SELECT NULL as x FROM numbers(1000);
SELECT sum(isNull(t1.x)), count(t1.x) FROM nt AS t1 INNER JOIN nt AS t2 ON t1.x = t2.x;
SELECT sum(isNull(t1.x)), count(t1.x) FROM nt AS t1 LEFT JOIN nt AS t2 ON t1.x = t2.x;
SELECT sum(isNull(t1.x)), count(t1.x) FROM nt AS t1 RIGHT JOIN nt AS t2 ON t1.x = t2.x;
SELECT sum(isNull(t1.x)), count(t1.x) FROM nt AS t1 FULL JOIN nt AS t2 ON t1.x = t2.x;
SELECT sum(isNull(t1.x)), count(t1.x) FROM nt AS t1 INNER JOIN nt AS t2 USING(x);
SELECT sum(isNull(t1.x)), count(t1.x) FROM nt AS t1 LEFT JOIN nt AS t2 USING(x);
SELECT sum(isNull(t1.x)), count(t1.x) FROM nt AS t1 RIGHT JOIN nt AS t2 USING(x);
SELECT sum(isNull(t1.x)), count(t1.x) FROM nt AS t1 FULL JOIN nt AS t2 USING(x);
DROP TABLE t;
DROP TABLE nt;