fix all left partial merge join on block borders

This commit is contained in:
chertus 2019-09-24 21:21:57 +03:00
parent 80d902a401
commit 0467229eff
5 changed files with 595 additions and 2 deletions

View File

@ -476,10 +476,14 @@ void MergeJoin::leftJoin(MergeJoinCursor & left_cursor, const Block & left_block
while (!left_cursor.atEnd() && !right_cursor.atEnd())
{
size_t left_position = left_cursor.position(); /// save inequal position
/// Not zero left_key_tail means there were equality for the last left key in previous leftJoin() call.
/// Do not join it twice: join only if it's equal with a first right key of current leftJoin() call and skip otherwise.
size_t left_unequal_position = left_cursor.position() + left_key_tail;
left_key_tail = 0;
Range range = left_cursor.getNextEqualRange(right_cursor);
joinInequalsLeft(left_block, left_columns, right_columns, left_position, range.left_start, is_all);
joinInequalsLeft(left_block, left_columns, right_columns, left_unequal_position, range.left_start, is_all);
if (range.empty())
break;

View File

@ -0,0 +1,188 @@
any left
0 0 0
1 10 0
2 20 2
3 30 0
4 40 4
-
0 0 0
1 10 0
2 20 0
3 30 0
4 40 0
-
0 0 0
1 10 0
2 20 2
3 30 0
4 40 4
-
0 0 0
1 10 0
2 20 0
3 30 0
4 40 0
all left
0 0 0 0
1 10 0 0
2 20 2 21
2 20 2 22
3 30 0 0
4 40 4 41
4 40 4 42
-
0 0 0 0
1 10 0 0
2 20 0 0
3 30 0 0
4 40 0 0
-
0 0 0 0
1 10 0 0
2 20 0 0
3 30 0 0
4 40 0 0
-
0 0 0 0
1 10 0 0
2 20 2 21
2 20 2 22
3 30 0 0
4 40 4 41
4 40 4 42
-
0 0 0 0
1 10 0 0
2 20 2 21
2 20 2 22
3 30 0 0
4 40 4 41
4 40 4 42
any inner
0 0 0
2 20 2
4 40 4
-
0 0 0
-
0 0 0
2 20 2
4 40 4
-
0 0 0
all inner
0 0 0 0
2 20 2 21
2 20 2 22
4 40 4 41
4 40 4 42
-
0 0 0 0
-
0 0 0 0
-
0 0 0 0
2 20 2 21
2 20 2 22
4 40 4 41
4 40 4 42
-
0 0 0 0
2 20 2 21
2 20 2 22
4 40 4 41
4 40 4 42
any left
0 0 0
1 10 \N
2 20 2
3 30 \N
4 40 4
-
0 0 0
1 10 \N
2 20 \N
3 30 \N
4 40 \N
-
0 0 0
1 10 \N
2 20 2
3 30 \N
4 40 4
-
0 0 0
1 10 \N
2 20 \N
3 30 \N
4 40 \N
all left
0 0 0 0
1 10 \N \N
2 20 2 21
2 20 2 22
3 30 \N \N
4 40 4 41
4 40 4 42
-
0 0 0 0
1 10 \N \N
2 20 \N \N
3 30 \N \N
4 40 \N \N
-
0 0 0 0
1 10 \N \N
2 20 \N \N
3 30 \N \N
4 40 \N \N
-
0 0 0 0
1 10 \N \N
2 20 2 21
2 20 2 22
3 30 \N \N
4 40 4 41
4 40 4 42
-
0 0 0 0
1 10 \N \N
2 20 2 21
2 20 2 22
3 30 \N \N
4 40 4 41
4 40 4 42
any inner
0 0 0
2 20 2
4 40 4
-
0 0 0
-
0 0 0
2 20 2
4 40 4
-
0 0 0
all inner
0 0 0 0
2 20 2 21
2 20 2 22
4 40 4 41
4 40 4 42
-
0 0 0 0
-
0 0 0 0
-
0 0 0 0
2 20 2 21
2 20 2 22
4 40 4 41
4 40 4 42
-
0 0 0 0
2 20 2 21
2 20 2 22
4 40 4 41
4 40 4 42

View File

@ -0,0 +1,106 @@
DROP TABLE IF EXISTS t0;
DROP TABLE IF EXISTS t1;
DROP TABLE IF EXISTS t2;
CREATE TABLE t0 (x UInt32, y UInt64) engine = MergeTree ORDER BY (x,y);
CREATE TABLE t1 (x UInt32, y UInt64) engine = MergeTree ORDER BY (x,y);
CREATE TABLE t2 (x UInt32, y UInt64) engine = MergeTree ORDER BY (x,y);
SET partial_merge_join = 1;
SET partial_merge_join_rows_in_right_blocks = 1;
SET any_join_distinct_right_table_keys = 1;
INSERT INTO t1 (x, y) VALUES (0, 0);
INSERT INTO t1 (x, y) VALUES (1, 10) (2, 20);
INSERT INTO t1 (x, y) VALUES (4, 40) (3, 30);
INSERT INTO t2 (x, y) VALUES (4, 41) (2, 21) (2, 22);
INSERT INTO t2 (x, y) VALUES (0, 0) (5, 50) (4, 42);
SET join_use_nulls = 0;
SELECT 'any left';
SELECT t1.*, t2.x FROM t1 ANY LEFT JOIN t2 USING (x) ORDER BY x;
SELECT '-';
SELECT t1.*, t2.x FROM t1 ANY LEFT JOIN t2 USING (x,y) ORDER BY x;
SELECT '-';
SELECT t1.*, t2.x FROM t1 ANY LEFT JOIN t2 USING (x) ORDER BY x;
SELECT '-';
SELECT t1.*, t2.x FROM t1 ANY LEFT JOIN t2 USING (x,y) ORDER BY x;
SELECT 'all left';
SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.x = t2.x ORDER BY x, t2.y;
SELECT '-';
SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.y = t2.y ORDER BY x;
SELECT '-';
SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.x = t2.x AND t1.y = t2.y ORDER BY x;
SELECT '-';
SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.x = t2.x AND toUInt32(intDiv(t1.y,10)) = t2.x ORDER BY x, t2.y;
SELECT '-';
SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.x = t2.x AND toUInt64(t1.x) = intDiv(t2.y,10) ORDER BY x, t2.y;
SELECT 'any inner';
SELECT t1.*, t2.x FROM t1 ANY INNER JOIN t2 USING (x) ORDER BY x;
SELECT '-';
SELECT t1.*, t2.x FROM t1 ANY INNER JOIN t2 USING (x,y) ORDER BY x;
SELECT '-';
SELECT t1.*, t2.x FROM t1 ANY INNER JOIN t2 USING (x) ORDER BY x;
SELECT '-';
SELECT t1.*, t2.x FROM t1 ANY INNER JOIN t2 USING (x,y) ORDER BY x;
SELECT 'all inner';
SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.x = t2.x ORDER BY x, t2.y;
SELECT '-';
SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.y = t2.y ORDER BY x;
SELECT '-';
SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.x = t2.x AND t1.y = t2.y ORDER BY x;
SELECT '-';
SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.x = t2.x AND toUInt32(intDiv(t1.y,10)) = t2.x ORDER BY x, t2.y;
SELECT '-';
SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.x = t2.x AND toUInt64(t1.x) = intDiv(t2.y,10) ORDER BY x, t2.y;
SET join_use_nulls = 1;
SELECT 'any left';
SELECT t1.*, t2.x FROM t1 ANY LEFT JOIN t2 USING (x) ORDER BY x;
SELECT '-';
SELECT t1.*, t2.x FROM t1 ANY LEFT JOIN t2 USING (x,y) ORDER BY x;
SELECT '-';
SELECT t1.*, t2.x FROM t1 ANY LEFT JOIN t2 USING (x) ORDER BY x;
SELECT '-';
SELECT t1.*, t2.x FROM t1 ANY LEFT JOIN t2 USING (x,y) ORDER BY x;
SELECT 'all left';
SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.x = t2.x ORDER BY x, t2.y;
SELECT '-';
SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.y = t2.y ORDER BY x;
SELECT '-';
SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.x = t2.x AND t1.y = t2.y ORDER BY x;
SELECT '-';
SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.x = t2.x AND toUInt32(intDiv(t1.y,10)) = t2.x ORDER BY x, t2.y;
SELECT '-';
SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.x = t2.x AND toUInt64(t1.x) = intDiv(t2.y,10) ORDER BY x, t2.y;
SELECT 'any inner';
SELECT t1.*, t2.x FROM t1 ANY INNER JOIN t2 USING (x) ORDER BY x;
SELECT '-';
SELECT t1.*, t2.x FROM t1 ANY INNER JOIN t2 USING (x,y) ORDER BY x;
SELECT '-';
SELECT t1.*, t2.x FROM t1 ANY INNER JOIN t2 USING (x) ORDER BY x;
SELECT '-';
SELECT t1.*, t2.x FROM t1 ANY INNER JOIN t2 USING (x,y) ORDER BY x;
SELECT 'all inner';
SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.x = t2.x ORDER BY x, t2.y;
SELECT '-';
SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.y = t2.y ORDER BY x;
SELECT '-';
SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.x = t2.x AND t1.y = t2.y ORDER BY x;
SELECT '-';
SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.x = t2.x AND toUInt32(intDiv(t1.y,10)) = t2.x ORDER BY x, t2.y;
SELECT '-';
SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.x = t2.x AND toUInt64(t1.x) = intDiv(t2.y,10) ORDER BY x, t2.y;
DROP TABLE t0;
DROP TABLE t1;
DROP TABLE t2;

View File

@ -0,0 +1,188 @@
any left
0 0 0
1 10 0
2 20 2
3 30 0
4 40 4
-
0 0 0
1 10 0
2 20 0
3 30 0
4 40 0
-
0 0 0
1 10 0
2 20 2
3 30 0
4 40 4
-
0 0 0
1 10 0
2 20 0
3 30 0
4 40 0
all left
0 0 0 0
1 10 0 0
2 20 2 21
2 20 2 22
3 30 0 0
4 40 4 41
4 40 4 42
-
0 0 0 0
1 10 0 0
2 20 0 0
3 30 0 0
4 40 0 0
-
0 0 0 0
1 10 0 0
2 20 0 0
3 30 0 0
4 40 0 0
-
0 0 0 0
1 10 0 0
2 20 2 21
2 20 2 22
3 30 0 0
4 40 4 41
4 40 4 42
-
0 0 0 0
1 10 0 0
2 20 2 21
2 20 2 22
3 30 0 0
4 40 4 41
4 40 4 42
any inner
0 0 0
2 20 2
4 40 4
-
0 0 0
-
0 0 0
2 20 2
4 40 4
-
0 0 0
all inner
0 0 0 0
2 20 2 21
2 20 2 22
4 40 4 41
4 40 4 42
-
0 0 0 0
-
0 0 0 0
-
0 0 0 0
2 20 2 21
2 20 2 22
4 40 4 41
4 40 4 42
-
0 0 0 0
2 20 2 21
2 20 2 22
4 40 4 41
4 40 4 42
any left
0 0 0
1 10 \N
2 20 2
3 30 \N
4 40 4
-
0 0 0
1 10 \N
2 20 \N
3 30 \N
4 40 \N
-
0 0 0
1 10 \N
2 20 2
3 30 \N
4 40 4
-
0 0 0
1 10 \N
2 20 \N
3 30 \N
4 40 \N
all left
0 0 0 0
1 10 \N \N
2 20 2 21
2 20 2 22
3 30 \N \N
4 40 4 41
4 40 4 42
-
0 0 0 0
1 10 \N \N
2 20 \N \N
3 30 \N \N
4 40 \N \N
-
0 0 0 0
1 10 \N \N
2 20 \N \N
3 30 \N \N
4 40 \N \N
-
0 0 0 0
1 10 \N \N
2 20 2 21
2 20 2 22
3 30 \N \N
4 40 4 41
4 40 4 42
-
0 0 0 0
1 10 \N \N
2 20 2 21
2 20 2 22
3 30 \N \N
4 40 4 41
4 40 4 42
any inner
0 0 0
2 20 2
4 40 4
-
0 0 0
-
0 0 0
2 20 2
4 40 4
-
0 0 0
all inner
0 0 0 0
2 20 2 21
2 20 2 22
4 40 4 41
4 40 4 42
-
0 0 0 0
-
0 0 0 0
-
0 0 0 0
2 20 2 21
2 20 2 22
4 40 4 41
4 40 4 42
-
0 0 0 0
2 20 2 21
2 20 2 22
4 40 4 41
4 40 4 42

View File

@ -0,0 +1,107 @@
DROP TABLE IF EXISTS t0;
DROP TABLE IF EXISTS t1;
DROP TABLE IF EXISTS t2;
CREATE TABLE t0 (x UInt32, y UInt64) engine = MergeTree ORDER BY (x,y);
CREATE TABLE t1 (x UInt32, y UInt64) engine = MergeTree ORDER BY (x,y);
CREATE TABLE t2 (x UInt32, y UInt64) engine = MergeTree ORDER BY (x,y);
SET partial_merge_join = 1;
SET partial_merge_join_optimisations = 1;
SET partial_merge_join_rows_in_right_blocks = 2;
SET any_join_distinct_right_table_keys = 1;
INSERT INTO t1 (x, y) VALUES (0, 0);
INSERT INTO t1 (x, y) VALUES (1, 10) (2, 20);
INSERT INTO t1 (x, y) VALUES (4, 40) (3, 30);
INSERT INTO t2 (x, y) VALUES (4, 41) (2, 21) (2, 22);
INSERT INTO t2 (x, y) VALUES (0, 0) (5, 50) (4, 42);
SET join_use_nulls = 0;
SELECT 'any left';
SELECT t1.*, t2.x FROM t1 ANY LEFT JOIN t2 USING (x) ORDER BY x;
SELECT '-';
SELECT t1.*, t2.x FROM t1 ANY LEFT JOIN t2 USING (x,y) ORDER BY x;
SELECT '-';
SELECT t1.*, t2.x FROM t1 ANY LEFT JOIN t2 USING (x) ORDER BY x;
SELECT '-';
SELECT t1.*, t2.x FROM t1 ANY LEFT JOIN t2 USING (x,y) ORDER BY x;
SELECT 'all left';
SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.x = t2.x ORDER BY x, t2.y;
SELECT '-';
SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.y = t2.y ORDER BY x;
SELECT '-';
SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.x = t2.x AND t1.y = t2.y ORDER BY x;
SELECT '-';
SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.x = t2.x AND toUInt32(intDiv(t1.y,10)) = t2.x ORDER BY x, t2.y;
SELECT '-';
SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.x = t2.x AND toUInt64(t1.x) = intDiv(t2.y,10) ORDER BY x, t2.y;
SELECT 'any inner';
SELECT t1.*, t2.x FROM t1 ANY INNER JOIN t2 USING (x) ORDER BY x;
SELECT '-';
SELECT t1.*, t2.x FROM t1 ANY INNER JOIN t2 USING (x,y) ORDER BY x;
SELECT '-';
SELECT t1.*, t2.x FROM t1 ANY INNER JOIN t2 USING (x) ORDER BY x;
SELECT '-';
SELECT t1.*, t2.x FROM t1 ANY INNER JOIN t2 USING (x,y) ORDER BY x;
SELECT 'all inner';
SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.x = t2.x ORDER BY x, t2.y;
SELECT '-';
SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.y = t2.y ORDER BY x;
SELECT '-';
SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.x = t2.x AND t1.y = t2.y ORDER BY x;
SELECT '-';
SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.x = t2.x AND toUInt32(intDiv(t1.y,10)) = t2.x ORDER BY x, t2.y;
SELECT '-';
SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.x = t2.x AND toUInt64(t1.x) = intDiv(t2.y,10) ORDER BY x, t2.y;
SET join_use_nulls = 1;
SELECT 'any left';
SELECT t1.*, t2.x FROM t1 ANY LEFT JOIN t2 USING (x) ORDER BY x;
SELECT '-';
SELECT t1.*, t2.x FROM t1 ANY LEFT JOIN t2 USING (x,y) ORDER BY x;
SELECT '-';
SELECT t1.*, t2.x FROM t1 ANY LEFT JOIN t2 USING (x) ORDER BY x;
SELECT '-';
SELECT t1.*, t2.x FROM t1 ANY LEFT JOIN t2 USING (x,y) ORDER BY x;
SELECT 'all left';
SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.x = t2.x ORDER BY x, t2.y;
SELECT '-';
SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.y = t2.y ORDER BY x;
SELECT '-';
SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.x = t2.x AND t1.y = t2.y ORDER BY x;
SELECT '-';
SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.x = t2.x AND toUInt32(intDiv(t1.y,10)) = t2.x ORDER BY x, t2.y;
SELECT '-';
SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.x = t2.x AND toUInt64(t1.x) = intDiv(t2.y,10) ORDER BY x, t2.y;
SELECT 'any inner';
SELECT t1.*, t2.x FROM t1 ANY INNER JOIN t2 USING (x) ORDER BY x;
SELECT '-';
SELECT t1.*, t2.x FROM t1 ANY INNER JOIN t2 USING (x,y) ORDER BY x;
SELECT '-';
SELECT t1.*, t2.x FROM t1 ANY INNER JOIN t2 USING (x) ORDER BY x;
SELECT '-';
SELECT t1.*, t2.x FROM t1 ANY INNER JOIN t2 USING (x,y) ORDER BY x;
SELECT 'all inner';
SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.x = t2.x ORDER BY x, t2.y;
SELECT '-';
SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.y = t2.y ORDER BY x;
SELECT '-';
SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.x = t2.x AND t1.y = t2.y ORDER BY x;
SELECT '-';
SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.x = t2.x AND toUInt32(intDiv(t1.y,10)) = t2.x ORDER BY x, t2.y;
SELECT '-';
SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.x = t2.x AND toUInt64(t1.x) = intDiv(t2.y,10) ORDER BY x, t2.y;
DROP TABLE t0;
DROP TABLE t1;
DROP TABLE t2;