From a7bb8f412fdc9e293cd07ffaa71aa29d0ee48a82 Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 12 May 2023 11:51:39 +0000 Subject: [PATCH] Allow ASOF JOIN over nullable right column --- src/Interpreters/HashJoin.cpp | 37 +++++++- src/Interpreters/TableJoin.cpp | 4 - tests/broken_tests.txt | 2 - .../01428_nullable_asof_join.reference | 28 ++++++ .../0_stateless/01428_nullable_asof_join.sql | 38 ++++++-- .../02735_asof_join_right_null.reference | 95 +++++++++++++++++++ .../02735_asof_join_right_null.sql | 32 +++++++ 7 files changed, 217 insertions(+), 19 deletions(-) create mode 100644 tests/queries/0_stateless/02735_asof_join_right_null.reference create mode 100644 tests/queries/0_stateless/02735_asof_join_right_null.sql diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index c9843dca825..c58120c3da9 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -710,15 +710,46 @@ Block HashJoin::prepareRightBlock(const Block & block) const return prepareRightBlock(block, savedBlockSample()); } -bool HashJoin::addJoinedBlock(const Block & source_block, bool check_limits) +bool HashJoin::addJoinedBlock(const Block & source_block_, bool check_limits) { if (!data) throw Exception(ErrorCodes::LOGICAL_ERROR, "Join data was released"); /// RowRef::SizeT is uint32_t (not size_t) for hash table Cell memory efficiency. /// It's possible to split bigger blocks and insert them by parts here. But it would be a dead code. - if (unlikely(source_block.rows() > std::numeric_limits::max())) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Too many rows in right table block for HashJoin: {}", source_block.rows()); + if (unlikely(source_block_.rows() > std::numeric_limits::max())) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Too many rows in right table block for HashJoin: {}", source_block_.rows()); + + Block source_block = source_block_; + if (strictness == JoinStrictness::Asof) + { + chassert(kind == JoinKind::Left || kind == JoinKind::Inner); + + // Filter out rows with NULLs in asof key + const auto & asof_key_name = table_join->getOnlyClause().key_names_right.back(); + auto & asof_column = source_block.getByName(asof_key_name); + + if (asof_column.type->isNullable()) + { + /// filter rows with nulls in asof key + if (const auto * asof_const_column = typeid_cast(asof_column.column.get())) + { + if (asof_const_column->isNullAt(0)) + return false; + } + else + { + const auto & asof_column_nullable = assert_cast(*asof_column.column).getNullMapData(); + + NullMap negative_null_map(asof_column_nullable.size()); + for (size_t i = 0; i < asof_column_nullable.size(); ++i) + negative_null_map[i] = !asof_column_nullable[i]; + + for (auto & column : source_block) + column.column = column.column->filter(negative_null_map, -1); + } + } + } size_t rows = source_block.rows(); diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp index 2d882083f3d..5a23fbd00ff 100644 --- a/src/Interpreters/TableJoin.cpp +++ b/src/Interpreters/TableJoin.cpp @@ -492,10 +492,6 @@ void TableJoin::inferJoinKeyCommonType(const LeftNamesAndTypes & left, const Rig { if (clauses.size() != 1) throw DB::Exception(ErrorCodes::NOT_IMPLEMENTED, "ASOF join over multiple keys is not supported"); - - auto asof_key_type = right_types.find(clauses.back().key_names_right.back()); - if (asof_key_type != right_types.end() && asof_key_type->second->isNullable()) - throw DB::Exception(ErrorCodes::NOT_IMPLEMENTED, "ASOF join over right table Nullable column is not implemented"); } forAllKeys(clauses, [&](const auto & left_key_name, const auto & right_key_name) diff --git a/tests/broken_tests.txt b/tests/broken_tests.txt index 0b4efacba0b..7db123bf467 100644 --- a/tests/broken_tests.txt +++ b/tests/broken_tests.txt @@ -37,8 +37,6 @@ 01268_shard_avgweighted 01270_optimize_skip_unused_shards_low_cardinality 01319_optimize_skip_unused_shards_nesting -01353_low_cardinality_join_types -01428_nullable_asof_join 01455_shard_leaf_max_rows_bytes_to_read 01476_right_full_join_switch 01477_lc_in_merge_join_left_key diff --git a/tests/queries/0_stateless/01428_nullable_asof_join.reference b/tests/queries/0_stateless/01428_nullable_asof_join.reference index f04655fefaa..73825dce725 100644 --- a/tests/queries/0_stateless/01428_nullable_asof_join.reference +++ b/tests/queries/0_stateless/01428_nullable_asof_join.reference @@ -5,6 +5,15 @@ left asof using 0 \N 0 \N UInt8 Nullable(UInt8) Nullable(UInt8) Nullable(UInt8) 1 \N 1 \N UInt8 Nullable(UInt8) Nullable(UInt8) Nullable(UInt8) 1 1 2 2 UInt8 Nullable(UInt8) Nullable(UInt8) Nullable(UInt8) +0 \N 0 \N UInt8 Nullable(UInt8) UInt8 Nullable(UInt8) +1 1 1 0 UInt8 Nullable(UInt8) UInt8 Nullable(UInt8) +1 1 2 0 UInt8 Nullable(UInt8) UInt8 Nullable(UInt8) +0 \N 0 \N UInt8 Nullable(UInt8) Nullable(UInt8) Nullable(UInt8) +1 1 1 0 UInt8 Nullable(UInt8) Nullable(UInt8) Nullable(UInt8) +1 1 2 0 UInt8 Nullable(UInt8) Nullable(UInt8) Nullable(UInt8) +0 \N 0 \N UInt8 Nullable(UInt8) Nullable(UInt8) Nullable(UInt8) +1 1 1 0 UInt8 Nullable(UInt8) Nullable(UInt8) Nullable(UInt8) +1 1 2 0 UInt8 Nullable(UInt8) Nullable(UInt8) Nullable(UInt8) left asof on 0 \N 0 \N UInt8 Nullable(UInt8) UInt8 Nullable(UInt8) 1 \N 1 \N UInt8 Nullable(UInt8) UInt8 Nullable(UInt8) @@ -12,9 +21,28 @@ left asof on 0 \N 0 \N UInt8 Nullable(UInt8) Nullable(UInt8) Nullable(UInt8) 1 \N 1 \N UInt8 Nullable(UInt8) Nullable(UInt8) Nullable(UInt8) 1 1 2 2 UInt8 Nullable(UInt8) Nullable(UInt8) Nullable(UInt8) +0 \N 0 \N UInt8 Nullable(UInt8) UInt8 Nullable(UInt8) +1 1 1 0 UInt8 Nullable(UInt8) UInt8 Nullable(UInt8) +1 1 2 0 UInt8 Nullable(UInt8) UInt8 Nullable(UInt8) +0 \N 0 \N UInt8 Nullable(UInt8) Nullable(UInt8) Nullable(UInt8) +1 1 1 0 UInt8 Nullable(UInt8) Nullable(UInt8) Nullable(UInt8) +1 1 2 0 UInt8 Nullable(UInt8) Nullable(UInt8) Nullable(UInt8) asof using 1 1 2 2 UInt8 UInt8 UInt8 UInt8 1 1 2 2 UInt8 UInt8 Nullable(UInt8) UInt8 +1 1 2 2 UInt8 UInt8 Nullable(UInt8) Nullable(UInt8) +1 1 1 0 UInt8 UInt8 UInt8 Nullable(UInt8) +1 1 2 0 UInt8 UInt8 UInt8 Nullable(UInt8) +1 1 1 0 UInt8 UInt8 Nullable(UInt8) Nullable(UInt8) +1 1 2 0 UInt8 UInt8 Nullable(UInt8) Nullable(UInt8) +1 1 1 0 UInt8 UInt8 Nullable(UInt8) Nullable(UInt8) +1 1 2 0 UInt8 UInt8 Nullable(UInt8) Nullable(UInt8) asof on 1 1 2 2 UInt8 UInt8 UInt8 UInt8 1 1 2 2 UInt8 UInt8 Nullable(UInt8) UInt8 +1 1 1 0 UInt8 UInt8 UInt8 Nullable(UInt8) +1 1 2 0 UInt8 UInt8 UInt8 Nullable(UInt8) +1 1 1 0 UInt8 UInt8 Nullable(UInt8) Nullable(UInt8) +1 1 2 0 UInt8 UInt8 Nullable(UInt8) Nullable(UInt8) +1 1 1 0 UInt8 UInt8 Nullable(UInt8) Nullable(UInt8) +1 1 2 0 UInt8 UInt8 Nullable(UInt8) Nullable(UInt8) diff --git a/tests/queries/0_stateless/01428_nullable_asof_join.sql b/tests/queries/0_stateless/01428_nullable_asof_join.sql index e1b00158d68..f07a26edd97 100644 --- a/tests/queries/0_stateless/01428_nullable_asof_join.sql +++ b/tests/queries/0_stateless/01428_nullable_asof_join.sql @@ -18,13 +18,19 @@ SELECT a.pk, b.pk, a.dt, b.dt, toTypeName(a.pk), toTypeName(b.pk), toTypeName(ma FROM (SELECT toUInt8(number) > 0 as pk, toUInt8(number) as dt FROM numbers(3)) a ASOF LEFT JOIN (SELECT 1 as pk, toNullable(0) as dt) b USING(pk, dt) -ORDER BY a.dt; -- { serverError 48 } +ORDER BY a.dt SETTINGS allow_experimental_analyzer = 0; + +SELECT a.pk, b.pk, a.dt, b.dt, toTypeName(a.pk), toTypeName(b.pk), toTypeName(materialize(a.dt)), toTypeName(materialize(b.dt)) +FROM (SELECT toUInt8(number) > 0 as pk, toUInt8(number) as dt FROM numbers(3)) a +ASOF LEFT JOIN (SELECT 1 as pk, toNullable(0) as dt) b +USING(pk, dt) +ORDER BY a.dt SETTINGS allow_experimental_analyzer = 1; SELECT a.pk, b.pk, a.dt, b.dt, toTypeName(a.pk), toTypeName(b.pk), toTypeName(materialize(a.dt)), toTypeName(materialize(b.dt)) FROM (SELECT toUInt8(number) > 0 as pk, toNullable(toUInt8(number)) as dt FROM numbers(3)) a ASOF LEFT JOIN (SELECT 1 as pk, toNullable(0) as dt) b USING(pk, dt) -ORDER BY a.dt; -- { serverError 48 } +ORDER BY a.dt; select 'left asof on'; @@ -44,13 +50,13 @@ SELECT a.pk, b.pk, a.dt, b.dt, toTypeName(a.pk), toTypeName(b.pk), toTypeName(ma FROM (SELECT toUInt8(number) > 0 as pk, toUInt8(number) as dt FROM numbers(3)) a ASOF LEFT JOIN (SELECT 1 as pk, toNullable(0) as dt) b ON a.pk = b.pk AND a.dt >= b.dt -ORDER BY a.dt; -- { serverError 48 } +ORDER BY a.dt; SELECT a.pk, b.pk, a.dt, b.dt, toTypeName(a.pk), toTypeName(b.pk), toTypeName(materialize(a.dt)), toTypeName(materialize(b.dt)) FROM (SELECT toUInt8(number) > 0 as pk, toNullable(toUInt8(number)) as dt FROM numbers(3)) a ASOF LEFT JOIN (SELECT 1 as pk, toNullable(0) as dt) b ON a.dt >= b.dt AND a.pk = b.pk -ORDER BY a.dt; -- { serverError 48 } +ORDER BY a.dt; select 'asof using'; @@ -64,19 +70,31 @@ SELECT a.pk, b.pk, a.dt, b.dt, toTypeName(a.pk), toTypeName(b.pk), toTypeName(ma FROM (SELECT toUInt8(number) > 0 as pk, toNullable(toUInt8(number)) as dt FROM numbers(3)) a ASOF JOIN (SELECT 1 as pk, 2 as dt) b USING(pk, dt) -ORDER BY a.dt; +ORDER BY a.dt SETTINGS allow_experimental_analyzer = 0; + +SELECT a.pk, b.pk, a.dt, b.dt, toTypeName(a.pk), toTypeName(b.pk), toTypeName(materialize(a.dt)), toTypeName(materialize(b.dt)) +FROM (SELECT toUInt8(number) > 0 as pk, toNullable(toUInt8(number)) as dt FROM numbers(3)) a +ASOF JOIN (SELECT 1 as pk, 2 as dt) b +USING(pk, dt) +ORDER BY a.dt SETTINGS allow_experimental_analyzer = 1; SELECT a.pk, b.pk, a.dt, b.dt, toTypeName(a.pk), toTypeName(b.pk), toTypeName(materialize(a.dt)), toTypeName(materialize(b.dt)) FROM (SELECT toUInt8(number) > 0 as pk, toUInt8(number) as dt FROM numbers(3)) a ASOF JOIN (SELECT 1 as pk, toNullable(0) as dt) b USING(pk, dt) -ORDER BY a.dt; -- { serverError 48 } +ORDER BY a.dt SETTINGS allow_experimental_analyzer = 0; + +SELECT a.pk, b.pk, a.dt, b.dt, toTypeName(a.pk), toTypeName(b.pk), toTypeName(materialize(a.dt)), toTypeName(materialize(b.dt)) +FROM (SELECT toUInt8(number) > 0 as pk, toUInt8(number) as dt FROM numbers(3)) a +ASOF JOIN (SELECT 1 as pk, toNullable(0) as dt) b +USING(pk, dt) +ORDER BY a.dt SETTINGS allow_experimental_analyzer = 1; SELECT a.pk, b.pk, a.dt, b.dt, toTypeName(a.pk), toTypeName(b.pk), toTypeName(materialize(a.dt)), toTypeName(materialize(b.dt)) FROM (SELECT toUInt8(number) > 0 as pk, toNullable(toUInt8(number)) as dt FROM numbers(3)) a ASOF JOIN (SELECT 1 as pk, toNullable(0) as dt) b USING(pk, dt) -ORDER BY a.dt; -- { serverError 48 } +ORDER BY a.dt; select 'asof on'; @@ -96,19 +114,19 @@ SELECT a.pk, b.pk, a.dt, b.dt, toTypeName(a.pk), toTypeName(b.pk), toTypeName(ma FROM (SELECT toUInt8(number) > 0 as pk, toUInt8(number) as dt FROM numbers(3)) a ASOF JOIN (SELECT 1 as pk, toNullable(0) as dt) b ON a.pk = b.pk AND a.dt >= b.dt -ORDER BY a.dt; -- { serverError 48 } +ORDER BY a.dt; SELECT a.pk, b.pk, a.dt, b.dt, toTypeName(a.pk), toTypeName(b.pk), toTypeName(materialize(a.dt)), toTypeName(materialize(b.dt)) FROM (SELECT toUInt8(number) > 0 as pk, toNullable(toUInt8(number)) as dt FROM numbers(3)) a ASOF JOIN (SELECT 1 as pk, toNullable(0) as dt) b ON a.pk = b.pk AND a.dt >= b.dt -ORDER BY a.dt; -- { serverError 48 } +ORDER BY a.dt; SELECT a.pk, b.pk, a.dt, b.dt, toTypeName(a.pk), toTypeName(b.pk), toTypeName(materialize(a.dt)), toTypeName(materialize(b.dt)) FROM (SELECT toUInt8(number) > 0 as pk, toNullable(toUInt8(number)) as dt FROM numbers(3)) a ASOF JOIN (SELECT 1 as pk, toNullable(0) as dt) b ON a.dt >= b.dt AND a.pk = b.pk -ORDER BY a.dt; -- { serverError 48 } +ORDER BY a.dt; SELECT * FROM (SELECT NULL AS y, 1 AS x, '2020-01-01 10:10:10' :: DateTime64 AS t) AS t1 diff --git a/tests/queries/0_stateless/02735_asof_join_right_null.reference b/tests/queries/0_stateless/02735_asof_join_right_null.reference new file mode 100644 index 00000000000..d4332556cb5 --- /dev/null +++ b/tests/queries/0_stateless/02735_asof_join_right_null.reference @@ -0,0 +1,95 @@ +-- { echoOn } +SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b < t2.b ORDER BY t1.b; +1 -1 1 1 +1 0 1 1 +1 1 1 2 +SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b <= t2.b ORDER BY t1.b; +1 -1 1 1 +1 0 1 1 +1 1 1 1 +1 2 1 2 +SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b > t2.b ORDER BY t1.b; +1 2 1 1 +1 3 1 2 +1 4 1 2 +SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b >= t2.b ORDER BY t1.b; +1 1 1 1 +1 2 1 2 +1 3 1 2 +1 4 1 2 +SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b < t2.b ORDER BY t1.b; +1 -1 1 1 +1 0 1 1 +1 1 1 2 +1 2 0 \N +1 3 0 \N +1 4 0 \N +SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b <= t2.b ORDER BY t1.b; +1 -1 1 1 +1 0 1 1 +1 1 1 1 +1 2 1 2 +1 3 0 \N +1 4 0 \N +SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b > t2.b ORDER BY t1.b; +1 -1 0 \N +1 0 0 \N +1 1 0 \N +1 2 1 1 +1 3 1 2 +1 4 1 2 +SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b >= t2.b ORDER BY t1.b; +1 -1 0 \N +1 0 0 \N +1 1 1 1 +1 2 1 2 +1 3 1 2 +1 4 1 2 +SET join_use_nulls = 1; +SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b < t2.b ORDER BY t1.b; +1 -1 1 1 +1 0 1 1 +1 1 1 2 +SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b <= t2.b ORDER BY t1.b; +1 -1 1 1 +1 0 1 1 +1 1 1 1 +1 2 1 2 +SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b > t2.b ORDER BY t1.b; +1 2 1 1 +1 3 1 2 +1 4 1 2 +SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b >= t2.b ORDER BY t1.b; +1 1 1 1 +1 2 1 2 +1 3 1 2 +1 4 1 2 +SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b < t2.b ORDER BY t1.b; +1 -1 1 1 +1 0 1 1 +1 1 1 2 +1 2 \N \N +1 3 \N \N +1 4 \N \N +SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b <= t2.b ORDER BY t1.b; +1 -1 1 1 +1 0 1 1 +1 1 1 1 +1 2 1 2 +1 3 \N \N +1 4 \N \N +SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b > t2.b ORDER BY t1.b; +1 -1 \N \N +1 0 \N \N +1 1 \N \N +1 2 1 1 +1 3 1 2 +1 4 1 2 +SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b >= t2.b ORDER BY t1.b; +1 -1 \N \N +1 0 \N \N +1 1 1 1 +1 2 1 2 +1 3 1 2 +1 4 1 2 +DROP TABLE t1; diff --git a/tests/queries/0_stateless/02735_asof_join_right_null.sql b/tests/queries/0_stateless/02735_asof_join_right_null.sql new file mode 100644 index 00000000000..997d33a0570 --- /dev/null +++ b/tests/queries/0_stateless/02735_asof_join_right_null.sql @@ -0,0 +1,32 @@ + +CREATE TABLE t1 (a Int, b Int) ENGINE = Memory; +INSERT INTO t1 VALUES (1, -1), (1, 0), (1, 1), (1, 2), (1, 3), (1, 4); + +CREATE TABLE t2 (a Int, b Nullable(Int)) ENGINE = Memory; +INSERT INTO t2 VALUES (1, 1), (1, NULL), (1, 2); + +-- { echoOn } +SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b < t2.b ORDER BY t1.b; +SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b <= t2.b ORDER BY t1.b; +SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b > t2.b ORDER BY t1.b; +SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b >= t2.b ORDER BY t1.b; + +SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b < t2.b ORDER BY t1.b; +SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b <= t2.b ORDER BY t1.b; +SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b > t2.b ORDER BY t1.b; +SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b >= t2.b ORDER BY t1.b; + +SET join_use_nulls = 1; + +SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b < t2.b ORDER BY t1.b; +SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b <= t2.b ORDER BY t1.b; +SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b > t2.b ORDER BY t1.b; +SELECT * FROM t1 ASOF JOIN t2 ON t1.a = t2.a AND t1.b >= t2.b ORDER BY t1.b; + +SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b < t2.b ORDER BY t1.b; +SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b <= t2.b ORDER BY t1.b; +SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b > t2.b ORDER BY t1.b; +SELECT * FROM t1 ASOF LEFT JOIN t2 ON t1.a = t2.a AND t1.b >= t2.b ORDER BY t1.b; + +DROP TABLE t1; +