Allow ASOF JOIN over nullable right column

This commit is contained in:
vdimir 2023-05-12 11:51:39 +00:00
parent 6061f3b90e
commit a7bb8f412f
7 changed files with 217 additions and 19 deletions

View File

@ -710,15 +710,46 @@ Block HashJoin::prepareRightBlock(const Block & block) const
return prepareRightBlock(block, savedBlockSample());
}
bool HashJoin::addJoinedBlock(const Block & source_block, bool check_limits)
bool HashJoin::addJoinedBlock(const Block & source_block_, bool check_limits)
{
if (!data)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Join data was released");
/// RowRef::SizeT is uint32_t (not size_t) for hash table Cell memory efficiency.
/// It's possible to split bigger blocks and insert them by parts here. But it would be a dead code.
if (unlikely(source_block.rows() > std::numeric_limits<RowRef::SizeT>::max()))
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Too many rows in right table block for HashJoin: {}", source_block.rows());
if (unlikely(source_block_.rows() > std::numeric_limits<RowRef::SizeT>::max()))
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Too many rows in right table block for HashJoin: {}", source_block_.rows());
Block source_block = source_block_;
if (strictness == JoinStrictness::Asof)
{
chassert(kind == JoinKind::Left || kind == JoinKind::Inner);
// Filter out rows with NULLs in asof key
const auto & asof_key_name = table_join->getOnlyClause().key_names_right.back();
auto & asof_column = source_block.getByName(asof_key_name);
if (asof_column.type->isNullable())
{
/// filter rows with nulls in asof key
if (const auto * asof_const_column = typeid_cast<const ColumnConst *>(asof_column.column.get()))
{
if (asof_const_column->isNullAt(0))
return false;
}
else
{
const auto & asof_column_nullable = assert_cast<const ColumnNullable &>(*asof_column.column).getNullMapData();
NullMap negative_null_map(asof_column_nullable.size());
for (size_t i = 0; i < asof_column_nullable.size(); ++i)
negative_null_map[i] = !asof_column_nullable[i];
for (auto & column : source_block)
column.column = column.column->filter(negative_null_map, -1);
}
}
}
size_t rows = source_block.rows();

View File

@ -492,10 +492,6 @@ void TableJoin::inferJoinKeyCommonType(const LeftNamesAndTypes & left, const Rig
{
if (clauses.size() != 1)
throw DB::Exception(ErrorCodes::NOT_IMPLEMENTED, "ASOF join over multiple keys is not supported");
auto asof_key_type = right_types.find(clauses.back().key_names_right.back());
if (asof_key_type != right_types.end() && asof_key_type->second->isNullable())
throw DB::Exception(ErrorCodes::NOT_IMPLEMENTED, "ASOF join over right table Nullable column is not implemented");
}
forAllKeys(clauses, [&](const auto & left_key_name, const auto & right_key_name)

View File

@ -37,8 +37,6 @@
01268_shard_avgweighted
01270_optimize_skip_unused_shards_low_cardinality
01319_optimize_skip_unused_shards_nesting
01353_low_cardinality_join_types
01428_nullable_asof_join
01455_shard_leaf_max_rows_bytes_to_read
01476_right_full_join_switch
01477_lc_in_merge_join_left_key

View File

@ -5,6 +5,15 @@ left asof using
0 \N 0 \N UInt8 Nullable(UInt8) Nullable(UInt8) Nullable(UInt8)
1 \N 1 \N UInt8 Nullable(UInt8) Nullable(UInt8) Nullable(UInt8)
1 1 2 2 UInt8 Nullable(UInt8) Nullable(UInt8) Nullable(UInt8)
0 \N 0 \N UInt8 Nullable(UInt8) UInt8 Nullable(UInt8)
1 1 1 0 UInt8 Nullable(UInt8) UInt8 Nullable(UInt8)
1 1 2 0 UInt8 Nullable(UInt8) UInt8 Nullable(UInt8)
0 \N 0 \N UInt8 Nullable(UInt8) Nullable(UInt8) Nullable(UInt8)
1 1 1 0 UInt8 Nullable(UInt8) Nullable(UInt8) Nullable(UInt8)
1 1 2 0 UInt8 Nullable(UInt8) Nullable(UInt8) Nullable(UInt8)
0 \N 0 \N UInt8 Nullable(UInt8) Nullable(UInt8) Nullable(UInt8)
1 1 1 0 UInt8 Nullable(UInt8) Nullable(UInt8) Nullable(UInt8)
1 1 2 0 UInt8 Nullable(UInt8) Nullable(UInt8) Nullable(UInt8)
left asof on
0 \N 0 \N UInt8 Nullable(UInt8) UInt8 Nullable(UInt8)
1 \N 1 \N UInt8 Nullable(UInt8) UInt8 Nullable(UInt8)
@ -12,9 +21,28 @@ left asof on
0 \N 0 \N UInt8 Nullable(UInt8) Nullable(UInt8) Nullable(UInt8)
1 \N 1 \N UInt8 Nullable(UInt8) Nullable(UInt8) Nullable(UInt8)
1 1 2 2 UInt8 Nullable(UInt8) Nullable(UInt8) Nullable(UInt8)
0 \N 0 \N UInt8 Nullable(UInt8) UInt8 Nullable(UInt8)
1 1 1 0 UInt8 Nullable(UInt8) UInt8 Nullable(UInt8)
1 1 2 0 UInt8 Nullable(UInt8) UInt8 Nullable(UInt8)
0 \N 0 \N UInt8 Nullable(UInt8) Nullable(UInt8) Nullable(UInt8)
1 1 1 0 UInt8 Nullable(UInt8) Nullable(UInt8) Nullable(UInt8)
1 1 2 0 UInt8 Nullable(UInt8) Nullable(UInt8) Nullable(UInt8)
asof using
1 1 2 2 UInt8 UInt8 UInt8 UInt8
1 1 2 2 UInt8 UInt8 Nullable(UInt8) UInt8
1 1 2 2 UInt8 UInt8 Nullable(UInt8) Nullable(UInt8)
1 1 1 0 UInt8 UInt8 UInt8 Nullable(UInt8)
1 1 2 0 UInt8 UInt8 UInt8 Nullable(UInt8)
1 1 1 0 UInt8 UInt8 Nullable(UInt8) Nullable(UInt8)
1 1 2 0 UInt8 UInt8 Nullable(UInt8) Nullable(UInt8)
1 1 1 0 UInt8 UInt8 Nullable(UInt8) Nullable(UInt8)
1 1 2 0 UInt8 UInt8 Nullable(UInt8) Nullable(UInt8)
asof on
1 1 2 2 UInt8 UInt8 UInt8 UInt8
1 1 2 2 UInt8 UInt8 Nullable(UInt8) UInt8
1 1 1 0 UInt8 UInt8 UInt8 Nullable(UInt8)
1 1 2 0 UInt8 UInt8 UInt8 Nullable(UInt8)
1 1 1 0 UInt8 UInt8 Nullable(UInt8) Nullable(UInt8)
1 1 2 0 UInt8 UInt8 Nullable(UInt8) Nullable(UInt8)
1 1 1 0 UInt8 UInt8 Nullable(UInt8) Nullable(UInt8)
1 1 2 0 UInt8 UInt8 Nullable(UInt8) Nullable(UInt8)

View File

@ -18,13 +18,19 @@ SELECT a.pk, b.pk, a.dt, b.dt, toTypeName(a.pk), toTypeName(b.pk), toTypeName(ma
FROM (SELECT toUInt8(number) > 0 as pk, toUInt8(number) as dt FROM numbers(3)) a
ASOF LEFT JOIN (SELECT 1 as pk, toNullable(0) as dt) b
USING(pk, dt)
ORDER BY a.dt; -- { serverError 48 }
ORDER BY a.dt SETTINGS allow_experimental_analyzer = 0;
SELECT a.pk, b.pk, a.dt, b.dt, toTypeName(a.pk), toTypeName(b.pk), toTypeName(materialize(a.dt)), toTypeName(materialize(b.dt))
FROM (SELECT toUInt8(number) > 0 as pk, toUInt8(number) as dt FROM numbers(3)) a
ASOF LEFT JOIN (SELECT 1 as pk, toNullable(0) as dt) b
USING(pk, dt)
ORDER BY a.dt SETTINGS allow_experimental_analyzer = 1;
SELECT a.pk, b.pk, a.dt, b.dt, toTypeName(a.pk), toTypeName(b.pk), toTypeName(materialize(a.dt)), toTypeName(materialize(b.dt))
FROM (SELECT toUInt8(number) > 0 as pk, toNullable(toUInt8(number)) as dt FROM numbers(3)) a
ASOF LEFT JOIN (SELECT 1 as pk, toNullable(0) as dt) b
USING(pk, dt)
ORDER BY a.dt; -- { serverError 48 }
ORDER BY a.dt;
select 'left asof on';
@ -44,13 +50,13 @@ SELECT a.pk, b.pk, a.dt, b.dt, toTypeName(a.pk), toTypeName(b.pk), toTypeName(ma
FROM (SELECT toUInt8(number) > 0 as pk, toUInt8(number) as dt FROM numbers(3)) a
ASOF LEFT JOIN (SELECT 1 as pk, toNullable(0) as dt) b
ON a.pk = b.pk AND a.dt >= b.dt
ORDER BY a.dt; -- { serverError 48 }
ORDER BY a.dt;
SELECT a.pk, b.pk, a.dt, b.dt, toTypeName(a.pk), toTypeName(b.pk), toTypeName(materialize(a.dt)), toTypeName(materialize(b.dt))
FROM (SELECT toUInt8(number) > 0 as pk, toNullable(toUInt8(number)) as dt FROM numbers(3)) a
ASOF LEFT JOIN (SELECT 1 as pk, toNullable(0) as dt) b
ON a.dt >= b.dt AND a.pk = b.pk
ORDER BY a.dt; -- { serverError 48 }
ORDER BY a.dt;
select 'asof using';
@ -64,19 +70,31 @@ SELECT a.pk, b.pk, a.dt, b.dt, toTypeName(a.pk), toTypeName(b.pk), toTypeName(ma
FROM (SELECT toUInt8(number) > 0 as pk, toNullable(toUInt8(number)) as dt FROM numbers(3)) a
ASOF JOIN (SELECT 1 as pk, 2 as dt) b
USING(pk, dt)
ORDER BY a.dt;
ORDER BY a.dt SETTINGS allow_experimental_analyzer = 0;
SELECT a.pk, b.pk, a.dt, b.dt, toTypeName(a.pk), toTypeName(b.pk), toTypeName(materialize(a.dt)), toTypeName(materialize(b.dt))
FROM (SELECT toUInt8(number) > 0 as pk, toNullable(toUInt8(number)) as dt FROM numbers(3)) a
ASOF JOIN (SELECT 1 as pk, 2 as dt) b
USING(pk, dt)
ORDER BY a.dt SETTINGS allow_experimental_analyzer = 1;
SELECT a.pk, b.pk, a.dt, b.dt, toTypeName(a.pk), toTypeName(b.pk), toTypeName(materialize(a.dt)), toTypeName(materialize(b.dt))
FROM (SELECT toUInt8(number) > 0 as pk, toUInt8(number) as dt FROM numbers(3)) a
ASOF JOIN (SELECT 1 as pk, toNullable(0) as dt) b
USING(pk, dt)
ORDER BY a.dt; -- { serverError 48 }
ORDER BY a.dt SETTINGS allow_experimental_analyzer = 0;
SELECT a.pk, b.pk, a.dt, b.dt, toTypeName(a.pk), toTypeName(b.pk), toTypeName(materialize(a.dt)), toTypeName(materialize(b.dt))
FROM (SELECT toUInt8(number) > 0 as pk, toUInt8(number) as dt FROM numbers(3)) a
ASOF JOIN (SELECT 1 as pk, toNullable(0) as dt) b
USING(pk, dt)
ORDER BY a.dt SETTINGS allow_experimental_analyzer = 1;
SELECT a.pk, b.pk, a.dt, b.dt, toTypeName(a.pk), toTypeName(b.pk), toTypeName(materialize(a.dt)), toTypeName(materialize(b.dt))
FROM (SELECT toUInt8(number) > 0 as pk, toNullable(toUInt8(number)) as dt FROM numbers(3)) a
ASOF JOIN (SELECT 1 as pk, toNullable(0) as dt) b
USING(pk, dt)
ORDER BY a.dt; -- { serverError 48 }
ORDER BY a.dt;
select 'asof on';
@ -96,19 +114,19 @@ SELECT a.pk, b.pk, a.dt, b.dt, toTypeName(a.pk), toTypeName(b.pk), toTypeName(ma
FROM (SELECT toUInt8(number) > 0 as pk, toUInt8(number) as dt FROM numbers(3)) a
ASOF JOIN (SELECT 1 as pk, toNullable(0) as dt) b
ON a.pk = b.pk AND a.dt >= b.dt
ORDER BY a.dt; -- { serverError 48 }
ORDER BY a.dt;
SELECT a.pk, b.pk, a.dt, b.dt, toTypeName(a.pk), toTypeName(b.pk), toTypeName(materialize(a.dt)), toTypeName(materialize(b.dt))
FROM (SELECT toUInt8(number) > 0 as pk, toNullable(toUInt8(number)) as dt FROM numbers(3)) a
ASOF JOIN (SELECT 1 as pk, toNullable(0) as dt) b
ON a.pk = b.pk AND a.dt >= b.dt
ORDER BY a.dt; -- { serverError 48 }
ORDER BY a.dt;
SELECT a.pk, b.pk, a.dt, b.dt, toTypeName(a.pk), toTypeName(b.pk), toTypeName(materialize(a.dt)), toTypeName(materialize(b.dt))
FROM (SELECT toUInt8(number) > 0 as pk, toNullable(toUInt8(number)) as dt FROM numbers(3)) a
ASOF JOIN (SELECT 1 as pk, toNullable(0) as dt) b
ON a.dt >= b.dt AND a.pk = b.pk
ORDER BY a.dt; -- { serverError 48 }
ORDER BY a.dt;
SELECT *
FROM (SELECT NULL AS y, 1 AS x, '2020-01-01 10:10:10' :: DateTime64 AS t) AS t1

View File

@ -0,0 +1,95 @@
-- { echoOn }
SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b < t2.b ORDER BY t1.b;
1 -1 1 1
1 0 1 1
1 1 1 2
SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b <= t2.b ORDER BY t1.b;
1 -1 1 1
1 0 1 1
1 1 1 1
1 2 1 2
SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b > t2.b ORDER BY t1.b;
1 2 1 1
1 3 1 2
1 4 1 2
SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b >= t2.b ORDER BY t1.b;
1 1 1 1
1 2 1 2
1 3 1 2
1 4 1 2
SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b < t2.b ORDER BY t1.b;
1 -1 1 1
1 0 1 1
1 1 1 2
1 2 0 \N
1 3 0 \N
1 4 0 \N
SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b <= t2.b ORDER BY t1.b;
1 -1 1 1
1 0 1 1
1 1 1 1
1 2 1 2
1 3 0 \N
1 4 0 \N
SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b > t2.b ORDER BY t1.b;
1 -1 0 \N
1 0 0 \N
1 1 0 \N
1 2 1 1
1 3 1 2
1 4 1 2
SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b >= t2.b ORDER BY t1.b;
1 -1 0 \N
1 0 0 \N
1 1 1 1
1 2 1 2
1 3 1 2
1 4 1 2
SET join_use_nulls = 1;
SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b < t2.b ORDER BY t1.b;
1 -1 1 1
1 0 1 1
1 1 1 2
SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b <= t2.b ORDER BY t1.b;
1 -1 1 1
1 0 1 1
1 1 1 1
1 2 1 2
SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b > t2.b ORDER BY t1.b;
1 2 1 1
1 3 1 2
1 4 1 2
SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b >= t2.b ORDER BY t1.b;
1 1 1 1
1 2 1 2
1 3 1 2
1 4 1 2
SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b < t2.b ORDER BY t1.b;
1 -1 1 1
1 0 1 1
1 1 1 2
1 2 \N \N
1 3 \N \N
1 4 \N \N
SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b <= t2.b ORDER BY t1.b;
1 -1 1 1
1 0 1 1
1 1 1 1
1 2 1 2
1 3 \N \N
1 4 \N \N
SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b > t2.b ORDER BY t1.b;
1 -1 \N \N
1 0 \N \N
1 1 \N \N
1 2 1 1
1 3 1 2
1 4 1 2
SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b >= t2.b ORDER BY t1.b;
1 -1 \N \N
1 0 \N \N
1 1 1 1
1 2 1 2
1 3 1 2
1 4 1 2
DROP TABLE t1;

View File

@ -0,0 +1,32 @@
CREATE TABLE t1 (a Int, b Int) ENGINE = Memory;
INSERT INTO t1 VALUES (1, -1), (1, 0), (1, 1), (1, 2), (1, 3), (1, 4);
CREATE TABLE t2 (a Int, b Nullable(Int)) ENGINE = Memory;
INSERT INTO t2 VALUES (1, 1), (1, NULL), (1, 2);
-- { echoOn }
SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b < t2.b ORDER BY t1.b;
SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b <= t2.b ORDER BY t1.b;
SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b > t2.b ORDER BY t1.b;
SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b >= t2.b ORDER BY t1.b;
SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b < t2.b ORDER BY t1.b;
SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b <= t2.b ORDER BY t1.b;
SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b > t2.b ORDER BY t1.b;
SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b >= t2.b ORDER BY t1.b;
SET join_use_nulls = 1;
SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b < t2.b ORDER BY t1.b;
SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b <= t2.b ORDER BY t1.b;
SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b > t2.b ORDER BY t1.b;
SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b >= t2.b ORDER BY t1.b;
SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b < t2.b ORDER BY t1.b;
SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b <= t2.b ORDER BY t1.b;
SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b > t2.b ORDER BY t1.b;
SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b >= t2.b ORDER BY t1.b;
DROP TABLE t1;