diff --git a/dbms/src/Interpreters/MergeJoin.cpp b/dbms/src/Interpreters/MergeJoin.cpp index bd7e7cfe078..2464500957b 100644 --- a/dbms/src/Interpreters/MergeJoin.cpp +++ b/dbms/src/Interpreters/MergeJoin.cpp @@ -476,10 +476,14 @@ void MergeJoin::leftJoin(MergeJoinCursor & left_cursor, const Block & left_block while (!left_cursor.atEnd() && !right_cursor.atEnd()) { - size_t left_position = left_cursor.position(); /// save inequal position + /// Not zero left_key_tail means there were equality for the last left key in previous leftJoin() call. + /// Do not join it twice: join only if it's equal with a first right key of current leftJoin() call and skip otherwise. + size_t left_unequal_position = left_cursor.position() + left_key_tail; + left_key_tail = 0; + Range range = left_cursor.getNextEqualRange(right_cursor); - joinInequalsLeft(left_block, left_columns, right_columns, left_position, range.left_start, is_all); + joinInequalsLeft(left_block, left_columns, right_columns, left_unequal_position, range.left_start, is_all); if (range.empty()) break; diff --git a/dbms/tests/queries/0_stateless/01010_pmj_one_row_blocks.reference b/dbms/tests/queries/0_stateless/01010_pmj_one_row_blocks.reference new file mode 100644 index 00000000000..c5ef57bb882 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01010_pmj_one_row_blocks.reference @@ -0,0 +1,188 @@ +any left +0 0 0 +1 10 0 +2 20 2 +3 30 0 +4 40 4 +- +0 0 0 +1 10 0 +2 20 0 +3 30 0 +4 40 0 +- +0 0 0 +1 10 0 +2 20 2 +3 30 0 +4 40 4 +- +0 0 0 +1 10 0 +2 20 0 +3 30 0 +4 40 0 +all left +0 0 0 0 +1 10 0 0 +2 20 2 21 +2 20 2 22 +3 30 0 0 +4 40 4 41 +4 40 4 42 +- +0 0 0 0 +1 10 0 0 +2 20 0 0 +3 30 0 0 +4 40 0 0 +- +0 0 0 0 +1 10 0 0 +2 20 0 0 +3 30 0 0 +4 40 0 0 +- +0 0 0 0 +1 10 0 0 +2 20 2 21 +2 20 2 22 +3 30 0 0 +4 40 4 41 +4 40 4 42 +- +0 0 0 0 +1 10 0 0 +2 20 2 21 +2 20 2 22 +3 30 0 0 +4 40 4 41 +4 40 4 42 +any inner +0 0 0 +2 20 2 +4 40 4 +- +0 0 0 +- +0 0 0 +2 20 2 +4 40 4 +- +0 0 0 +all inner +0 0 0 0 +2 20 2 21 +2 20 2 22 +4 40 4 41 +4 40 4 42 +- +0 0 0 0 +- +0 0 0 0 +- +0 0 0 0 +2 20 2 21 +2 20 2 22 +4 40 4 41 +4 40 4 42 +- +0 0 0 0 +2 20 2 21 +2 20 2 22 +4 40 4 41 +4 40 4 42 +any left +0 0 0 +1 10 \N +2 20 2 +3 30 \N +4 40 4 +- +0 0 0 +1 10 \N +2 20 \N +3 30 \N +4 40 \N +- +0 0 0 +1 10 \N +2 20 2 +3 30 \N +4 40 4 +- +0 0 0 +1 10 \N +2 20 \N +3 30 \N +4 40 \N +all left +0 0 0 0 +1 10 \N \N +2 20 2 21 +2 20 2 22 +3 30 \N \N +4 40 4 41 +4 40 4 42 +- +0 0 0 0 +1 10 \N \N +2 20 \N \N +3 30 \N \N +4 40 \N \N +- +0 0 0 0 +1 10 \N \N +2 20 \N \N +3 30 \N \N +4 40 \N \N +- +0 0 0 0 +1 10 \N \N +2 20 2 21 +2 20 2 22 +3 30 \N \N +4 40 4 41 +4 40 4 42 +- +0 0 0 0 +1 10 \N \N +2 20 2 21 +2 20 2 22 +3 30 \N \N +4 40 4 41 +4 40 4 42 +any inner +0 0 0 +2 20 2 +4 40 4 +- +0 0 0 +- +0 0 0 +2 20 2 +4 40 4 +- +0 0 0 +all inner +0 0 0 0 +2 20 2 21 +2 20 2 22 +4 40 4 41 +4 40 4 42 +- +0 0 0 0 +- +0 0 0 0 +- +0 0 0 0 +2 20 2 21 +2 20 2 22 +4 40 4 41 +4 40 4 42 +- +0 0 0 0 +2 20 2 21 +2 20 2 22 +4 40 4 41 +4 40 4 42 diff --git a/dbms/tests/queries/0_stateless/01010_pmj_one_row_blocks.sql b/dbms/tests/queries/0_stateless/01010_pmj_one_row_blocks.sql new file mode 100644 index 00000000000..59aa7d7d4d7 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01010_pmj_one_row_blocks.sql @@ -0,0 +1,106 @@ +DROP TABLE IF EXISTS t0; +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; + +CREATE TABLE t0 (x UInt32, y UInt64) engine = MergeTree ORDER BY (x,y); +CREATE TABLE t1 (x UInt32, y UInt64) engine = MergeTree ORDER BY (x,y); +CREATE TABLE t2 (x UInt32, y UInt64) engine = MergeTree ORDER BY (x,y); + +SET partial_merge_join = 1; +SET partial_merge_join_rows_in_right_blocks = 1; +SET any_join_distinct_right_table_keys = 1; + +INSERT INTO t1 (x, y) VALUES (0, 0); +INSERT INTO t1 (x, y) VALUES (1, 10) (2, 20); +INSERT INTO t1 (x, y) VALUES (4, 40) (3, 30); + +INSERT INTO t2 (x, y) VALUES (4, 41) (2, 21) (2, 22); +INSERT INTO t2 (x, y) VALUES (0, 0) (5, 50) (4, 42); + +SET join_use_nulls = 0; + +SELECT 'any left'; +SELECT t1.*, t2.x FROM t1 ANY LEFT JOIN t2 USING (x) ORDER BY x; +SELECT '-'; +SELECT t1.*, t2.x FROM t1 ANY LEFT JOIN t2 USING (x,y) ORDER BY x; +SELECT '-'; +SELECT t1.*, t2.x FROM t1 ANY LEFT JOIN t2 USING (x) ORDER BY x; +SELECT '-'; +SELECT t1.*, t2.x FROM t1 ANY LEFT JOIN t2 USING (x,y) ORDER BY x; + +SELECT 'all left'; +SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.x = t2.x ORDER BY x, t2.y; +SELECT '-'; +SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.y = t2.y ORDER BY x; +SELECT '-'; +SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.x = t2.x AND t1.y = t2.y ORDER BY x; +SELECT '-'; +SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.x = t2.x AND toUInt32(intDiv(t1.y,10)) = t2.x ORDER BY x, t2.y; +SELECT '-'; +SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.x = t2.x AND toUInt64(t1.x) = intDiv(t2.y,10) ORDER BY x, t2.y; + +SELECT 'any inner'; +SELECT t1.*, t2.x FROM t1 ANY INNER JOIN t2 USING (x) ORDER BY x; +SELECT '-'; +SELECT t1.*, t2.x FROM t1 ANY INNER JOIN t2 USING (x,y) ORDER BY x; +SELECT '-'; +SELECT t1.*, t2.x FROM t1 ANY INNER JOIN t2 USING (x) ORDER BY x; +SELECT '-'; +SELECT t1.*, t2.x FROM t1 ANY INNER JOIN t2 USING (x,y) ORDER BY x; + +SELECT 'all inner'; +SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.x = t2.x ORDER BY x, t2.y; +SELECT '-'; +SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.y = t2.y ORDER BY x; +SELECT '-'; +SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.x = t2.x AND t1.y = t2.y ORDER BY x; +SELECT '-'; +SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.x = t2.x AND toUInt32(intDiv(t1.y,10)) = t2.x ORDER BY x, t2.y; +SELECT '-'; +SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.x = t2.x AND toUInt64(t1.x) = intDiv(t2.y,10) ORDER BY x, t2.y; + +SET join_use_nulls = 1; + +SELECT 'any left'; +SELECT t1.*, t2.x FROM t1 ANY LEFT JOIN t2 USING (x) ORDER BY x; +SELECT '-'; +SELECT t1.*, t2.x FROM t1 ANY LEFT JOIN t2 USING (x,y) ORDER BY x; +SELECT '-'; +SELECT t1.*, t2.x FROM t1 ANY LEFT JOIN t2 USING (x) ORDER BY x; +SELECT '-'; +SELECT t1.*, t2.x FROM t1 ANY LEFT JOIN t2 USING (x,y) ORDER BY x; + +SELECT 'all left'; +SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.x = t2.x ORDER BY x, t2.y; +SELECT '-'; +SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.y = t2.y ORDER BY x; +SELECT '-'; +SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.x = t2.x AND t1.y = t2.y ORDER BY x; +SELECT '-'; +SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.x = t2.x AND toUInt32(intDiv(t1.y,10)) = t2.x ORDER BY x, t2.y; +SELECT '-'; +SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.x = t2.x AND toUInt64(t1.x) = intDiv(t2.y,10) ORDER BY x, t2.y; + +SELECT 'any inner'; +SELECT t1.*, t2.x FROM t1 ANY INNER JOIN t2 USING (x) ORDER BY x; +SELECT '-'; +SELECT t1.*, t2.x FROM t1 ANY INNER JOIN t2 USING (x,y) ORDER BY x; +SELECT '-'; +SELECT t1.*, t2.x FROM t1 ANY INNER JOIN t2 USING (x) ORDER BY x; +SELECT '-'; +SELECT t1.*, t2.x FROM t1 ANY INNER JOIN t2 USING (x,y) ORDER BY x; + +SELECT 'all inner'; +SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.x = t2.x ORDER BY x, t2.y; +SELECT '-'; +SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.y = t2.y ORDER BY x; +SELECT '-'; +SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.x = t2.x AND t1.y = t2.y ORDER BY x; +SELECT '-'; +SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.x = t2.x AND toUInt32(intDiv(t1.y,10)) = t2.x ORDER BY x, t2.y; +SELECT '-'; +SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.x = t2.x AND toUInt64(t1.x) = intDiv(t2.y,10) ORDER BY x, t2.y; + +DROP TABLE t0; +DROP TABLE t1; +DROP TABLE t2; diff --git a/dbms/tests/queries/0_stateless/01010_pmj_skip_blocks.reference b/dbms/tests/queries/0_stateless/01010_pmj_skip_blocks.reference new file mode 100644 index 00000000000..c5ef57bb882 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01010_pmj_skip_blocks.reference @@ -0,0 +1,188 @@ +any left +0 0 0 +1 10 0 +2 20 2 +3 30 0 +4 40 4 +- +0 0 0 +1 10 0 +2 20 0 +3 30 0 +4 40 0 +- +0 0 0 +1 10 0 +2 20 2 +3 30 0 +4 40 4 +- +0 0 0 +1 10 0 +2 20 0 +3 30 0 +4 40 0 +all left +0 0 0 0 +1 10 0 0 +2 20 2 21 +2 20 2 22 +3 30 0 0 +4 40 4 41 +4 40 4 42 +- +0 0 0 0 +1 10 0 0 +2 20 0 0 +3 30 0 0 +4 40 0 0 +- +0 0 0 0 +1 10 0 0 +2 20 0 0 +3 30 0 0 +4 40 0 0 +- +0 0 0 0 +1 10 0 0 +2 20 2 21 +2 20 2 22 +3 30 0 0 +4 40 4 41 +4 40 4 42 +- +0 0 0 0 +1 10 0 0 +2 20 2 21 +2 20 2 22 +3 30 0 0 +4 40 4 41 +4 40 4 42 +any inner +0 0 0 +2 20 2 +4 40 4 +- +0 0 0 +- +0 0 0 +2 20 2 +4 40 4 +- +0 0 0 +all inner +0 0 0 0 +2 20 2 21 +2 20 2 22 +4 40 4 41 +4 40 4 42 +- +0 0 0 0 +- +0 0 0 0 +- +0 0 0 0 +2 20 2 21 +2 20 2 22 +4 40 4 41 +4 40 4 42 +- +0 0 0 0 +2 20 2 21 +2 20 2 22 +4 40 4 41 +4 40 4 42 +any left +0 0 0 +1 10 \N +2 20 2 +3 30 \N +4 40 4 +- +0 0 0 +1 10 \N +2 20 \N +3 30 \N +4 40 \N +- +0 0 0 +1 10 \N +2 20 2 +3 30 \N +4 40 4 +- +0 0 0 +1 10 \N +2 20 \N +3 30 \N +4 40 \N +all left +0 0 0 0 +1 10 \N \N +2 20 2 21 +2 20 2 22 +3 30 \N \N +4 40 4 41 +4 40 4 42 +- +0 0 0 0 +1 10 \N \N +2 20 \N \N +3 30 \N \N +4 40 \N \N +- +0 0 0 0 +1 10 \N \N +2 20 \N \N +3 30 \N \N +4 40 \N \N +- +0 0 0 0 +1 10 \N \N +2 20 2 21 +2 20 2 22 +3 30 \N \N +4 40 4 41 +4 40 4 42 +- +0 0 0 0 +1 10 \N \N +2 20 2 21 +2 20 2 22 +3 30 \N \N +4 40 4 41 +4 40 4 42 +any inner +0 0 0 +2 20 2 +4 40 4 +- +0 0 0 +- +0 0 0 +2 20 2 +4 40 4 +- +0 0 0 +all inner +0 0 0 0 +2 20 2 21 +2 20 2 22 +4 40 4 41 +4 40 4 42 +- +0 0 0 0 +- +0 0 0 0 +- +0 0 0 0 +2 20 2 21 +2 20 2 22 +4 40 4 41 +4 40 4 42 +- +0 0 0 0 +2 20 2 21 +2 20 2 22 +4 40 4 41 +4 40 4 42 diff --git a/dbms/tests/queries/0_stateless/01010_pmj_skip_blocks.sql b/dbms/tests/queries/0_stateless/01010_pmj_skip_blocks.sql new file mode 100644 index 00000000000..3f2cbdd0cc4 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01010_pmj_skip_blocks.sql @@ -0,0 +1,107 @@ +DROP TABLE IF EXISTS t0; +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; + +CREATE TABLE t0 (x UInt32, y UInt64) engine = MergeTree ORDER BY (x,y); +CREATE TABLE t1 (x UInt32, y UInt64) engine = MergeTree ORDER BY (x,y); +CREATE TABLE t2 (x UInt32, y UInt64) engine = MergeTree ORDER BY (x,y); + +SET partial_merge_join = 1; +SET partial_merge_join_optimisations = 1; +SET partial_merge_join_rows_in_right_blocks = 2; +SET any_join_distinct_right_table_keys = 1; + +INSERT INTO t1 (x, y) VALUES (0, 0); +INSERT INTO t1 (x, y) VALUES (1, 10) (2, 20); +INSERT INTO t1 (x, y) VALUES (4, 40) (3, 30); + +INSERT INTO t2 (x, y) VALUES (4, 41) (2, 21) (2, 22); +INSERT INTO t2 (x, y) VALUES (0, 0) (5, 50) (4, 42); + +SET join_use_nulls = 0; + +SELECT 'any left'; +SELECT t1.*, t2.x FROM t1 ANY LEFT JOIN t2 USING (x) ORDER BY x; +SELECT '-'; +SELECT t1.*, t2.x FROM t1 ANY LEFT JOIN t2 USING (x,y) ORDER BY x; +SELECT '-'; +SELECT t1.*, t2.x FROM t1 ANY LEFT JOIN t2 USING (x) ORDER BY x; +SELECT '-'; +SELECT t1.*, t2.x FROM t1 ANY LEFT JOIN t2 USING (x,y) ORDER BY x; + +SELECT 'all left'; +SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.x = t2.x ORDER BY x, t2.y; +SELECT '-'; +SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.y = t2.y ORDER BY x; +SELECT '-'; +SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.x = t2.x AND t1.y = t2.y ORDER BY x; +SELECT '-'; +SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.x = t2.x AND toUInt32(intDiv(t1.y,10)) = t2.x ORDER BY x, t2.y; +SELECT '-'; +SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.x = t2.x AND toUInt64(t1.x) = intDiv(t2.y,10) ORDER BY x, t2.y; + +SELECT 'any inner'; +SELECT t1.*, t2.x FROM t1 ANY INNER JOIN t2 USING (x) ORDER BY x; +SELECT '-'; +SELECT t1.*, t2.x FROM t1 ANY INNER JOIN t2 USING (x,y) ORDER BY x; +SELECT '-'; +SELECT t1.*, t2.x FROM t1 ANY INNER JOIN t2 USING (x) ORDER BY x; +SELECT '-'; +SELECT t1.*, t2.x FROM t1 ANY INNER JOIN t2 USING (x,y) ORDER BY x; + +SELECT 'all inner'; +SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.x = t2.x ORDER BY x, t2.y; +SELECT '-'; +SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.y = t2.y ORDER BY x; +SELECT '-'; +SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.x = t2.x AND t1.y = t2.y ORDER BY x; +SELECT '-'; +SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.x = t2.x AND toUInt32(intDiv(t1.y,10)) = t2.x ORDER BY x, t2.y; +SELECT '-'; +SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.x = t2.x AND toUInt64(t1.x) = intDiv(t2.y,10) ORDER BY x, t2.y; + +SET join_use_nulls = 1; + +SELECT 'any left'; +SELECT t1.*, t2.x FROM t1 ANY LEFT JOIN t2 USING (x) ORDER BY x; +SELECT '-'; +SELECT t1.*, t2.x FROM t1 ANY LEFT JOIN t2 USING (x,y) ORDER BY x; +SELECT '-'; +SELECT t1.*, t2.x FROM t1 ANY LEFT JOIN t2 USING (x) ORDER BY x; +SELECT '-'; +SELECT t1.*, t2.x FROM t1 ANY LEFT JOIN t2 USING (x,y) ORDER BY x; + +SELECT 'all left'; +SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.x = t2.x ORDER BY x, t2.y; +SELECT '-'; +SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.y = t2.y ORDER BY x; +SELECT '-'; +SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.x = t2.x AND t1.y = t2.y ORDER BY x; +SELECT '-'; +SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.x = t2.x AND toUInt32(intDiv(t1.y,10)) = t2.x ORDER BY x, t2.y; +SELECT '-'; +SELECT t1.*, t2.* FROM t1 LEFT JOIN t2 ON t1.x = t2.x AND toUInt64(t1.x) = intDiv(t2.y,10) ORDER BY x, t2.y; + +SELECT 'any inner'; +SELECT t1.*, t2.x FROM t1 ANY INNER JOIN t2 USING (x) ORDER BY x; +SELECT '-'; +SELECT t1.*, t2.x FROM t1 ANY INNER JOIN t2 USING (x,y) ORDER BY x; +SELECT '-'; +SELECT t1.*, t2.x FROM t1 ANY INNER JOIN t2 USING (x) ORDER BY x; +SELECT '-'; +SELECT t1.*, t2.x FROM t1 ANY INNER JOIN t2 USING (x,y) ORDER BY x; + +SELECT 'all inner'; +SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.x = t2.x ORDER BY x, t2.y; +SELECT '-'; +SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.y = t2.y ORDER BY x; +SELECT '-'; +SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.x = t2.x AND t1.y = t2.y ORDER BY x; +SELECT '-'; +SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.x = t2.x AND toUInt32(intDiv(t1.y,10)) = t2.x ORDER BY x, t2.y; +SELECT '-'; +SELECT t1.*, t2.* FROM t1 INNER JOIN t2 ON t1.x = t2.x AND toUInt64(t1.x) = intDiv(t2.y,10) ORDER BY x, t2.y; + +DROP TABLE t0; +DROP TABLE t1; +DROP TABLE t2;