From 93c58c25e0e18b915491c5b07959965d678fd248 Mon Sep 17 00:00:00 2001 From: RogerYK Date: Fri, 21 Jan 2022 18:04:59 +0800 Subject: [PATCH 001/106] Fix substring negative offset length --- src/Functions/GatherUtils/Algorithms.h | 2 +- .../00970_substring_arg_validation.reference | 2 +- .../01060_substring_negative_size.reference | 33 +++++ .../01060_substring_negative_size.sql | 114 +++++++++++++++--- 4 files changed, 134 insertions(+), 17 deletions(-) diff --git a/src/Functions/GatherUtils/Algorithms.h b/src/Functions/GatherUtils/Algorithms.h index 046e2dcf70f..e61a7f1ad3c 100644 --- a/src/Functions/GatherUtils/Algorithms.h +++ b/src/Functions/GatherUtils/Algorithms.h @@ -306,7 +306,7 @@ void NO_INLINE sliceFromRightConstantOffsetBounded(Source && src, Sink && sink, { ssize_t size = length; if (size < 0) - size += static_cast(src.getElementSize()) - offset; + size += offset; if (size > 0) writeSlice(src.getSliceFromRight(offset, size), sink); diff --git a/tests/queries/0_stateless/00970_substring_arg_validation.reference b/tests/queries/0_stateless/00970_substring_arg_validation.reference index 13e7564ea0c..8b137891791 100644 --- a/tests/queries/0_stateless/00970_substring_arg_validation.reference +++ b/tests/queries/0_stateless/00970_substring_arg_validation.reference @@ -1 +1 @@ -o + diff --git a/tests/queries/0_stateless/01060_substring_negative_size.reference b/tests/queries/0_stateless/01060_substring_negative_size.reference index b25696dc7d6..3e219d20f02 100644 --- a/tests/queries/0_stateless/01060_substring_negative_size.reference +++ b/tests/queries/0_stateless/01060_substring_negative_size.reference @@ -2,11 +2,26 @@ bcdef bcdef bcdef bcdef +bcdef +bcdef + + + +g +g +g - bcdef bcdef bcdef bcdef +bcdef + + + +g +g +g - bcdef 23456 @@ -16,6 +31,15 @@ bcdef 2345 bcdef 345 +- + + + +6 + + + + - bcdef 23456 @@ -25,3 +49,12 @@ bcdef 2345 bcdef 345 +- + + + +6 + + + + diff --git a/tests/queries/0_stateless/01060_substring_negative_size.sql b/tests/queries/0_stateless/01060_substring_negative_size.sql index 23cab14a6e0..daae2c5072b 100644 --- a/tests/queries/0_stateless/01060_substring_negative_size.sql +++ b/tests/queries/0_stateless/01060_substring_negative_size.sql @@ -2,35 +2,119 @@ select substring('abcdefgh', 2, -2); select substring('abcdefgh', materialize(2), -2); select substring('abcdefgh', 2, materialize(-2)); select substring('abcdefgh', materialize(2), materialize(-2)); +select substring(materialize('abcdefgh'), 2, -2); +select substring(materialize('abcdefgh'), materialize(2), materialize(-2)); + +select substring('abcdefgh', -2, -2); +select substring(materialize('abcdefgh'), -2, -2); +select substring(materialize('abcdefgh'), materialize(-2), materialize(-2)); + +select substring('abcdefgh', -2, -1); +select substring(materialize('abcdefgh'), -2, -1); +select substring(materialize('abcdefgh'), materialize(-2), materialize(-1)); select '-'; select substring(cast('abcdefgh' as FixedString(8)), 2, -2); select substring(cast('abcdefgh' as FixedString(8)), materialize(2), -2); select substring(cast('abcdefgh' as FixedString(8)), 2, materialize(-2)); +select substring(materialize(cast('abcdefgh' as FixedString(8))), 2, -2); select substring(cast('abcdefgh' as FixedString(8)), materialize(2), materialize(-2)); -select '-'; +select substring(cast('abcdefgh' as FixedString(8)), -2, -2); +select substring(materialize(cast('abcdefgh' as FixedString(8))), -2, -2); +select substring(materialize(cast('abcdefgh' as FixedString(8))), materialize(-2), materialize(-2)); -drop table if exists t; -create table t (s String, l Int8, r Int8) engine = Memory; -insert into t values ('abcdefgh', 2, -2), ('12345678', 3, -3); - -select substring(s, 2, -2) from t; -select substring(s, l, -2) from t; -select substring(s, 2, r) from t; -select substring(s, l, r) from t; +select substring(cast('abcdefgh' as FixedString(8)), -2, -1); +select substring(materialize(cast('abcdefgh' as FixedString(8))), -2, -1); +select substring(materialize(cast('abcdefgh' as FixedString(8))), materialize(-2), materialize(-1)); select '-'; drop table if exists t; -create table t (s FixedString(8), l Int8, r Int8) engine = Memory; -insert into t values ('abcdefgh', 2, -2), ('12345678', 3, -3); +create table t +( + s String, + l Int8, + r Int8 +) engine = Memory; +insert into t +values ('abcdefgh', 2, -2), + ('12345678', 3, -3); -select substring(s, 2, -2) from t; -select substring(s, l, -2) from t; -select substring(s, 2, r) from t; -select substring(s, l, r) from t; +select substring(s, 2, -2) +from t; +select substring(s, l, -2) +from t; +select substring(s, 2, r) +from t; +select substring(s, l, r) +from t; + +select '-'; drop table if exists t; +create table t +( + s String, + l Int8, + r Int8 +) engine = Memory; +insert into t +values ('abcdefgh', -2, -2), + ('12345678', -3, -3); +select substring(s, -2, -2) +from t; +select substring(s, l, -2) +from t; +select substring(s, -2, r) +from t; +select substring(s, l, r) +from t; + +select '-'; + +drop table if exists t; +create table t +( + s FixedString(8), + l Int8, + r Int8 +) engine = Memory; +insert into t +values ('abcdefgh', 2, -2), + ('12345678', 3, -3); + +select substring(s, 2, -2) +from t; +select substring(s, l, -2) +from t; +select substring(s, 2, r) +from t; +select substring(s, l, r) +from t; + +select '-'; + +drop table if exists t; +create table t +( + s FixedString(8), + l Int8, + r Int8 +) engine = Memory; +insert into t +values ('abcdefgh', -2, -2), + ('12345678', -3, -3); + +select substring(s, -2, -2) +from t; +select substring(s, l, -2) +from t; +select substring(s, -2, r) +from t; +select substring(s, l, r) +from t; + +drop table if exists t; From c2e8322895da11d5a16d5b9f350d2379bc7db3ae Mon Sep 17 00:00:00 2001 From: RogerYK Date: Wed, 26 Jan 2022 15:59:40 +0800 Subject: [PATCH 002/106] Update old docs --- docs/en/sql-reference/functions/array-functions.md | 7 +++++-- docs/en/sql-reference/functions/bit-functions.md | 4 ++-- docs/en/sql-reference/functions/string-functions.md | 3 ++- docs/ru/sql-reference/functions/array-functions.md | 6 ++++-- docs/zh/sql-reference/functions/array-functions.md | 6 +++--- 5 files changed, 16 insertions(+), 10 deletions(-) diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 8231cda4b77..3915008ac88 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -619,8 +619,11 @@ arraySlice(array, offset[, length]) **Arguments** - `array` – Array of data. -- `offset` – Indent from the edge of the array. A positive value indicates an offset on the left, and a negative value is an indent on the right. Numbering of the array items begins with 1. -- `length` – The length of the required slice. If you specify a negative value, the function returns an open slice `[offset, array_length - length)`. If you omit the value, the function returns the slice `[offset, the_end_of_array]`. +- `offset` – Indent from the edge of the array. A positive value indicates an offset on the left, and a negative value + is an indent on the right. Numbering of the array items begins with 1. +- `length` – The length of the required slice. If you specify a negative value, the function returns an open + slice `[offset, array_length - length]`. If you omit the value, the function returns the + slice `[offset, the_end_of_array]`. **Example** diff --git a/docs/en/sql-reference/functions/bit-functions.md b/docs/en/sql-reference/functions/bit-functions.md index 24adb362c98..ba5f98ec54f 100644 --- a/docs/en/sql-reference/functions/bit-functions.md +++ b/docs/en/sql-reference/functions/bit-functions.md @@ -135,8 +135,8 @@ bitSlice(s, offset[, length]) - `offset` — The start index with bit, A positive value indicates an offset on the left, and a negative value is an indent on the right. Numbering of the bits begins with 1. - `length` — The length of substring with bit. If you specify a negative value, the function returns an open substring [ - offset, array_length - length). If you omit the value, the function returns the substring [offset, the_end_string]. - If length exceeds s, it will be truncate.If length isn't multiple of 8, will fill 0 on the right. + offset, array_length - length]. If you omit the value, the function returns the substring [offset, the_end_string]. If + length exceeds s, it will be truncate.If length isn't multiple of 8, will fill 0 on the right. **Returned value** diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index a30cacde519..410decaad3a 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -479,7 +479,8 @@ Result: ## substring(s, offset, length), mid(s, offset, length), substr(s, offset, length) {#substring} -Returns a substring starting with the byte from the ‘offset’ index that is ‘length’ bytes long. Character indexing starts from one (as in standard SQL). The ‘offset’ and ‘length’ arguments must be constants. +Returns a substring starting with the byte from the ‘offset’ index that is ‘length’ bytes long. Character indexing +starts from one (as in standard SQL). ## substringUTF8(s, offset, length) {#substringutf8} diff --git a/docs/ru/sql-reference/functions/array-functions.md b/docs/ru/sql-reference/functions/array-functions.md index 5507ca77f16..a7ebcec5c9d 100644 --- a/docs/ru/sql-reference/functions/array-functions.md +++ b/docs/ru/sql-reference/functions/array-functions.md @@ -575,8 +575,10 @@ arraySlice(array, offset[, length]) **Аргументы** - `array` – массив данных. -- `offset` – отступ от края массива. Положительное значение - отступ слева, отрицательное значение - отступ справа. Отсчет элементов массива начинается с 1. -- `length` – длина необходимого среза. Если указать отрицательное значение, то функция вернёт открытый срез `[offset, array_length - length)`. Если не указать значение, то функция вернёт срез `[offset, the_end_of_array]`. +- `offset` – отступ от края массива. Положительное значение - отступ слева, отрицательное значение - отступ справа. + Отсчет элементов массива начинается с 1. +- `length` – длина необходимого среза. Если указать отрицательное значение, то функция вернёт открытый + срез `[offset, array_length - length]`. Если не указать значение, то функция вернёт срез `[offset, the_end_of_array]`. **Пример** diff --git a/docs/zh/sql-reference/functions/array-functions.md b/docs/zh/sql-reference/functions/array-functions.md index 2a2b34da7e1..6ec6d8fe352 100644 --- a/docs/zh/sql-reference/functions/array-functions.md +++ b/docs/zh/sql-reference/functions/array-functions.md @@ -397,9 +397,9 @@ SELECT arrayPushFront(['b'], 'a') AS res **参数** -- `array` – 数组。 -- `offset` – 数组的偏移。正值表示左侧的偏移量,负值表示右侧的缩进值。数组下标从1开始。 -- `length` - 子数组的长度。如果指定负值,则该函数返回`[offset,array_length - length`。如果省略该值,则该函数返回`[offset,the_end_of_array]`。 +- `array` – 数组。 +- `offset` – 数组的偏移。正值表示左侧的偏移量,负值表示右侧的缩进值。数组下标从1开始。 +- `length` - 子数组的长度。如果指定负值,则该函数返回`[offset,array_length - length]`。如果省略该值,则该函数返回`[offset,the_end_of_array]`。 **示例** From 887eeddeb6b3290559d4f3bf49efbcc9369ef6ef Mon Sep 17 00:00:00 2001 From: RogerYK Date: Wed, 26 Jan 2022 20:30:15 +0800 Subject: [PATCH 003/106] Fix test --- ..._functions_concat_slice_push_pop.reference | 48 +++++++++---------- .../01060_substring_negative_size.sql | 14 ++---- 2 files changed, 29 insertions(+), 33 deletions(-) diff --git a/tests/queries/0_stateless/00498_array_functions_concat_slice_push_pop.reference b/tests/queries/0_stateless/00498_array_functions_concat_slice_push_pop.reference index f757a86aeee..f0d66bd49eb 100644 --- a/tests/queries/0_stateless/00498_array_functions_concat_slice_push_pop.reference +++ b/tests/queries/0_stateless/00498_array_functions_concat_slice_push_pop.reference @@ -217,12 +217,12 @@ arraySlice(arr1, -4, 2) arraySlice(arr1, -4, -1) [] 2 4 [] [] 2 4 [] -[1,2,3,4,5] 2 4 [] -[1,2,3,4,5,6,7] 2 4 [4,5] -[1,2,3,4,5,6,7] 2 4 [4,5] -[1,2,3,4,5,6,7] 2 4 [4,5] -[1,2,3,4,5,6,7] 2 4 [4,5] -[1,2,3,4,5,6,7] 2 4 [4,5] +[1,2,3,4,5] 2 4 [2,3,4] +[1,2,3,4,5,6,7] 2 4 [4,5,6] +[1,2,3,4,5,6,7] 2 4 [4,5,6] +[1,2,3,4,5,6,7] 2 4 [4,5,6] +[1,2,3,4,5,6,7] 2 4 [4,5,6] +[1,2,3,4,5,6,7] 2 4 [4,5,6] arrayPushFront(arr1, 1) [] [1] [] [1] @@ -504,12 +504,12 @@ arraySlice(arr1, -4, 2) arraySlice(arr1, -4, -1) [] 2 4 [] [] 2 4 [] -[1,2,3,4,5] 2 4 [] -[1,NULL,3,4,NULL,6,7] 2 4 [4,NULL] -[1,2,3,NULL,5,6,7] 2 4 [NULL,5] -[1,2,3,4,5,NULL,7] 2 4 [4,5] -[1,2,3,4,5,6,7] 2 4 [4,5] -[1,NULL,3,NULL,5,6,7] 2 4 [NULL,5] +[1,2,3,4,5] 2 4 [2,3,4] +[1,NULL,3,4,NULL,6,7] 2 4 [4,NULL,6] +[1,2,3,NULL,5,6,7] 2 4 [NULL,5,6] +[1,2,3,4,5,NULL,7] 2 4 [4,5,NULL] +[1,2,3,4,5,6,7] 2 4 [4,5,6] +[1,NULL,3,NULL,5,6,7] 2 4 [NULL,5,6] arrayPushFront(arr1, 1) [] [1] [] [1] @@ -817,12 +817,12 @@ arraySlice(arr1, -4, 2) arraySlice(arr1, -4, -1) [] 2 4 [] [] 2 4 [] -[1,2,3,4,5] 2 4 [] -[1,NULL,3,4,NULL,6,7] 2 4 [4,NULL] -[1,2,3,NULL,5,6,7] 2 4 [NULL,5] -[1,2,3,4,5,NULL,7] 2 4 [4,5] -[1,2,3,4,5,6,7] 2 4 [4,5] -[1,NULL,3,NULL,5,6,7] 2 4 [NULL,5] +[1,2,3,4,5] 2 4 [2,3,4] +[1,NULL,3,4,NULL,6,7] 2 4 [4,NULL,6] +[1,2,3,NULL,5,6,7] 2 4 [NULL,5,6] +[1,2,3,4,5,NULL,7] 2 4 [4,5,NULL] +[1,2,3,4,5,6,7] 2 4 [4,5,6] +[1,NULL,3,NULL,5,6,7] 2 4 [NULL,5,6] arrayPushFront(arr1, 1) [] [1] [] [1] @@ -1104,12 +1104,12 @@ arraySlice(arr1, -4, 2) arraySlice(arr1, -4, -1) [] 2 4 [] [] 2 4 [] -['1','2','3','4','5'] 2 4 [] -['1',NULL,'3','4',NULL,'6','7'] 2 4 ['4',NULL] -['1','2','3',NULL,'5','6','7'] 2 4 [NULL,'5'] -['1','2','3','4','5',NULL,'7'] 2 4 ['4','5'] -['1','2','3','4','5','6','7'] 2 4 ['4','5'] -['1',NULL,'3',NULL,'5','6','7'] 2 4 [NULL,'5'] +['1','2','3','4','5'] 2 4 ['2','3','4'] +['1',NULL,'3','4',NULL,'6','7'] 2 4 ['4',NULL,'6'] +['1','2','3',NULL,'5','6','7'] 2 4 [NULL,'5','6'] +['1','2','3','4','5',NULL,'7'] 2 4 ['4','5',NULL] +['1','2','3','4','5','6','7'] 2 4 ['4','5','6'] +['1',NULL,'3',NULL,'5','6','7'] 2 4 [NULL,'5','6'] arrayPushFront(arr1, 1) [] ['1'] [] ['1'] diff --git a/tests/queries/0_stateless/01060_substring_negative_size.sql b/tests/queries/0_stateless/01060_substring_negative_size.sql index daae2c5072b..321644db2fa 100644 --- a/tests/queries/0_stateless/01060_substring_negative_size.sql +++ b/tests/queries/0_stateless/01060_substring_negative_size.sql @@ -38,9 +38,8 @@ create table t l Int8, r Int8 ) engine = Memory; -insert into t -values ('abcdefgh', 2, -2), - ('12345678', 3, -3); +insert into t(s, l, r) +values ('abcdefgh', 2, -2),('12345678', 3, -3); select substring(s, 2, -2) from t; @@ -61,8 +60,7 @@ create table t r Int8 ) engine = Memory; insert into t -values ('abcdefgh', -2, -2), - ('12345678', -3, -3); +values ('abcdefgh', -2, -2),('12345678', -3, -3); select substring(s, -2, -2) from t; @@ -83,8 +81,7 @@ create table t r Int8 ) engine = Memory; insert into t -values ('abcdefgh', 2, -2), - ('12345678', 3, -3); +values ('abcdefgh', 2, -2),('12345678', 3, -3); select substring(s, 2, -2) from t; @@ -105,8 +102,7 @@ create table t r Int8 ) engine = Memory; insert into t -values ('abcdefgh', -2, -2), - ('12345678', -3, -3); +values ('abcdefgh', -2, -2),('12345678', -3, -3); select substring(s, -2, -2) from t; From 102f6f66a72450234ba953f9c5a759353ea89221 Mon Sep 17 00:00:00 2001 From: RogerYK Date: Fri, 28 Jan 2022 11:20:34 +0800 Subject: [PATCH 004/106] Add test --- .../01060_substring_negative_size.reference | 33 ------ .../01060_substring_negative_size.sql | 110 +++--------------- ..._arraySlice_negative_offset_size.reference | 15 +++ .../02185_arraySlice_negative_offset_size.sql | 25 ++++ ...5_substring_negative_offset_size.reference | 31 +++++ .../02185_substring_negative_offset_size.sql | 48 ++++++++ 6 files changed, 134 insertions(+), 128 deletions(-) create mode 100644 tests/queries/0_stateless/02185_arraySlice_negative_offset_size.reference create mode 100644 tests/queries/0_stateless/02185_arraySlice_negative_offset_size.sql create mode 100644 tests/queries/0_stateless/02185_substring_negative_offset_size.reference create mode 100644 tests/queries/0_stateless/02185_substring_negative_offset_size.sql diff --git a/tests/queries/0_stateless/01060_substring_negative_size.reference b/tests/queries/0_stateless/01060_substring_negative_size.reference index 3e219d20f02..b25696dc7d6 100644 --- a/tests/queries/0_stateless/01060_substring_negative_size.reference +++ b/tests/queries/0_stateless/01060_substring_negative_size.reference @@ -2,26 +2,11 @@ bcdef bcdef bcdef bcdef -bcdef -bcdef - - - -g -g -g - bcdef bcdef bcdef bcdef -bcdef - - - -g -g -g - bcdef 23456 @@ -31,15 +16,6 @@ bcdef 2345 bcdef 345 -- - - - -6 - - - - - bcdef 23456 @@ -49,12 +25,3 @@ bcdef 2345 bcdef 345 -- - - - -6 - - - - diff --git a/tests/queries/0_stateless/01060_substring_negative_size.sql b/tests/queries/0_stateless/01060_substring_negative_size.sql index 321644db2fa..23cab14a6e0 100644 --- a/tests/queries/0_stateless/01060_substring_negative_size.sql +++ b/tests/queries/0_stateless/01060_substring_negative_size.sql @@ -2,115 +2,35 @@ select substring('abcdefgh', 2, -2); select substring('abcdefgh', materialize(2), -2); select substring('abcdefgh', 2, materialize(-2)); select substring('abcdefgh', materialize(2), materialize(-2)); -select substring(materialize('abcdefgh'), 2, -2); -select substring(materialize('abcdefgh'), materialize(2), materialize(-2)); - -select substring('abcdefgh', -2, -2); -select substring(materialize('abcdefgh'), -2, -2); -select substring(materialize('abcdefgh'), materialize(-2), materialize(-2)); - -select substring('abcdefgh', -2, -1); -select substring(materialize('abcdefgh'), -2, -1); -select substring(materialize('abcdefgh'), materialize(-2), materialize(-1)); select '-'; select substring(cast('abcdefgh' as FixedString(8)), 2, -2); select substring(cast('abcdefgh' as FixedString(8)), materialize(2), -2); select substring(cast('abcdefgh' as FixedString(8)), 2, materialize(-2)); -select substring(materialize(cast('abcdefgh' as FixedString(8))), 2, -2); select substring(cast('abcdefgh' as FixedString(8)), materialize(2), materialize(-2)); -select substring(cast('abcdefgh' as FixedString(8)), -2, -2); -select substring(materialize(cast('abcdefgh' as FixedString(8))), -2, -2); -select substring(materialize(cast('abcdefgh' as FixedString(8))), materialize(-2), materialize(-2)); +select '-'; -select substring(cast('abcdefgh' as FixedString(8)), -2, -1); -select substring(materialize(cast('abcdefgh' as FixedString(8))), -2, -1); -select substring(materialize(cast('abcdefgh' as FixedString(8))), materialize(-2), materialize(-1)); +drop table if exists t; +create table t (s String, l Int8, r Int8) engine = Memory; +insert into t values ('abcdefgh', 2, -2), ('12345678', 3, -3); + +select substring(s, 2, -2) from t; +select substring(s, l, -2) from t; +select substring(s, 2, r) from t; +select substring(s, l, r) from t; select '-'; drop table if exists t; -create table t -( - s String, - l Int8, - r Int8 -) engine = Memory; -insert into t(s, l, r) -values ('abcdefgh', 2, -2),('12345678', 3, -3); +create table t (s FixedString(8), l Int8, r Int8) engine = Memory; +insert into t values ('abcdefgh', 2, -2), ('12345678', 3, -3); -select substring(s, 2, -2) -from t; -select substring(s, l, -2) -from t; -select substring(s, 2, r) -from t; -select substring(s, l, r) -from t; - -select '-'; +select substring(s, 2, -2) from t; +select substring(s, l, -2) from t; +select substring(s, 2, r) from t; +select substring(s, l, r) from t; drop table if exists t; -create table t -( - s String, - l Int8, - r Int8 -) engine = Memory; -insert into t -values ('abcdefgh', -2, -2),('12345678', -3, -3); -select substring(s, -2, -2) -from t; -select substring(s, l, -2) -from t; -select substring(s, -2, r) -from t; -select substring(s, l, r) -from t; - -select '-'; - -drop table if exists t; -create table t -( - s FixedString(8), - l Int8, - r Int8 -) engine = Memory; -insert into t -values ('abcdefgh', 2, -2),('12345678', 3, -3); - -select substring(s, 2, -2) -from t; -select substring(s, l, -2) -from t; -select substring(s, 2, r) -from t; -select substring(s, l, r) -from t; - -select '-'; - -drop table if exists t; -create table t -( - s FixedString(8), - l Int8, - r Int8 -) engine = Memory; -insert into t -values ('abcdefgh', -2, -2),('12345678', -3, -3); - -select substring(s, -2, -2) -from t; -select substring(s, l, -2) -from t; -select substring(s, -2, r) -from t; -select substring(s, l, r) -from t; - -drop table if exists t; diff --git a/tests/queries/0_stateless/02185_arraySlice_negative_offset_size.reference b/tests/queries/0_stateless/02185_arraySlice_negative_offset_size.reference new file mode 100644 index 00000000000..4e3701e4f86 --- /dev/null +++ b/tests/queries/0_stateless/02185_arraySlice_negative_offset_size.reference @@ -0,0 +1,15 @@ +[] +[] +[] +[7] +[7] +[7] +- +[] +[] +[] +[6] +[] +[] +[] +[] diff --git a/tests/queries/0_stateless/02185_arraySlice_negative_offset_size.sql b/tests/queries/0_stateless/02185_arraySlice_negative_offset_size.sql new file mode 100644 index 00000000000..e7f9f3948d9 --- /dev/null +++ b/tests/queries/0_stateless/02185_arraySlice_negative_offset_size.sql @@ -0,0 +1,25 @@ +select arraySlice([1, 2, 3, 4, 5, 6, 7, 8], -2, -2); +select arraySlice(materialize([1, 2, 3, 4, 5, 6, 7, 8]), -2, -2); +select arraySlice(materialize([1, 2, 3, 4, 5, 6, 7, 8]), materialize(-2), materialize(-2)); + +select arraySlice([1, 2, 3, 4, 5, 6, 7, 8], -2, -1); +select arraySlice(materialize([1, 2, 3, 4, 5, 6, 7, 8]), -2, -1); +select arraySlice(materialize([1, 2, 3, 4, 5, 6, 7, 8]), materialize(-2), materialize(-1)); + +select '-'; +drop table if exists t; +create table t +( + s Array(Int), + l Int8, + r Int8 +) engine = Memory; + +insert into t values ([1, 2, 3, 4, 5, 6, 7, 8], -2, -2),([1, 2, 3, 4, 5, 6, 7, 8], -3, -3); + +select arraySlice(s, -2, -2) from t; +select arraySlice(s, l, -2) from t; +select arraySlice(s, -2, r) from t; +select arraySlice(s, l, r) from t; + +drop table if exists t; \ No newline at end of file diff --git a/tests/queries/0_stateless/02185_substring_negative_offset_size.reference b/tests/queries/0_stateless/02185_substring_negative_offset_size.reference new file mode 100644 index 00000000000..107f083a4d8 --- /dev/null +++ b/tests/queries/0_stateless/02185_substring_negative_offset_size.reference @@ -0,0 +1,31 @@ + + + +g +g +g +- + + + +g +g +g +- + + + +6 + + + + +- + + + +6 + + + + diff --git a/tests/queries/0_stateless/02185_substring_negative_offset_size.sql b/tests/queries/0_stateless/02185_substring_negative_offset_size.sql new file mode 100644 index 00000000000..715dff30369 --- /dev/null +++ b/tests/queries/0_stateless/02185_substring_negative_offset_size.sql @@ -0,0 +1,48 @@ +select substring('abcdefgh', -2, -2); +select substring(materialize('abcdefgh'), -2, -2); +select substring(materialize('abcdefgh'), materialize(-2), materialize(-2)); + +select substring('abcdefgh', -2, -1); +select substring(materialize('abcdefgh'), -2, -1); +select substring(materialize('abcdefgh'), materialize(-2), materialize(-1)); + +select '-'; +select substring(cast('abcdefgh' as FixedString(8)), -2, -2); +select substring(materialize(cast('abcdefgh' as FixedString(8))), -2, -2); +select substring(materialize(cast('abcdefgh' as FixedString(8))), materialize(-2), materialize(-2)); + +select substring(cast('abcdefgh' as FixedString(8)), -2, -1); +select substring(materialize(cast('abcdefgh' as FixedString(8))), -2, -1); +select substring(materialize(cast('abcdefgh' as FixedString(8))), materialize(-2), materialize(-1)); + +select '-'; +drop table if exists t; +create table t +( + s String, + l Int8, + r Int8 +) engine = Memory; + +insert into t values ('abcdefgh', -2, -2),('12345678', -3, -3); + +select substring(s, -2, -2) from t; +select substring(s, l, -2) from t; +select substring(s, -2, r) from t; +select substring(s, l, r) from t; + +select '-'; +drop table if exists t; +create table t( + s FixedString(8), + l Int8, + r Int8 +) engine = Memory; +insert into t values ('abcdefgh', -2, -2),('12345678', -3, -3); + +select substring(s, -2, -2) from t; +select substring(s, l, -2) from t; +select substring(s, -2, r) from t; +select substring(s, l, r) from t; + +drop table if exists t; From 7f69507c957afca316a6b903c0ff950eb16228ba Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Tue, 1 Mar 2022 17:22:12 +0800 Subject: [PATCH 005/106] finish dev --- programs/client/Client.cpp | 154 +++++++++++++++++++++++++++++++++ programs/client/Client.h | 7 ++ programs/local/LocalServer.cpp | 9 ++ programs/local/LocalServer.h | 2 + src/Client/ClientBase.cpp | 151 -------------------------------- src/Client/ClientBase.h | 13 +-- 6 files changed, 179 insertions(+), 157 deletions(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index c40f41cd8d1..f7da4f5a8a0 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -50,6 +51,7 @@ #endif namespace fs = std::filesystem; +using namespace std::literals; namespace DB @@ -1222,6 +1224,158 @@ void Client::processConfig() client_info.quota_key = config().getString("quota_key", ""); } +void Client::readArguments( + int argc, + char ** argv, + Arguments & common_arguments, + std::vector & external_tables_arguments, + std::vector & hosts_and_ports_arguments) +{ + /** We allow different groups of arguments: + * - common arguments; + * - arguments for any number of external tables each in form "--external args...", + * where possible args are file, name, format, structure, types; + * - param arguments for prepared statements. + * Split these groups before processing. + */ + + bool in_external_group = false; + + std::string prev_host_arg; + std::string prev_port_arg; + + for (int arg_num = 1; arg_num < argc; ++arg_num) + { + const char * arg = argv[arg_num]; + + if (arg == "--external"sv) + { + in_external_group = true; + external_tables_arguments.emplace_back(Arguments{""}); + } + /// Options with value after equal sign. + else if (in_external_group + && (0 == strncmp(arg, "--file=", strlen("--file=")) || 0 == strncmp(arg, "--name=", strlen("--name=")) + || 0 == strncmp(arg, "--format=", strlen("--format=")) || 0 == strncmp(arg, "--structure=", strlen("--structure=")) + || 0 == strncmp(arg, "--types=", strlen("--types=")))) + { + external_tables_arguments.back().emplace_back(arg); + } + /// Options with value after whitespace. + else if (in_external_group + && (arg == "--file"sv || arg == "--name"sv || arg == "--format"sv + || arg == "--structure"sv || arg == "--types"sv)) + { + if (arg_num + 1 < argc) + { + external_tables_arguments.back().emplace_back(arg); + ++arg_num; + arg = argv[arg_num]; + external_tables_arguments.back().emplace_back(arg); + } + else + break; + } + else + { + in_external_group = false; + if (arg == "--file"sv || arg == "--name"sv || arg == "--format"sv || arg == "--structure"sv || arg == "--types"sv) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter must be in external group, try add --external before {}", arg); + + /// Parameter arg after underline. + if (startsWith(arg, "--param_")) + { + const char * param_continuation = arg + strlen("--param_"); + const char * equal_pos = strchr(param_continuation, '='); + + if (equal_pos == param_continuation) + throw Exception("Parameter name cannot be empty", ErrorCodes::BAD_ARGUMENTS); + + if (equal_pos) + { + /// param_name=value + query_parameters.emplace(String(param_continuation, equal_pos), String(equal_pos + 1)); + } + else + { + /// param_name value + ++arg_num; + if (arg_num >= argc) + throw Exception("Parameter requires value", ErrorCodes::BAD_ARGUMENTS); + arg = argv[arg_num]; + query_parameters.emplace(String(param_continuation), String(arg)); + } + } + else if (startsWith(arg, "--host") || startsWith(arg, "-h")) + { + std::string host_arg; + /// --host host + if (arg == "--host"sv || arg == "-h"sv) + { + ++arg_num; + if (arg_num >= argc) + throw Exception("Host argument requires value", ErrorCodes::BAD_ARGUMENTS); + arg = argv[arg_num]; + host_arg = "--host="; + host_arg.append(arg); + } + else + host_arg = arg; + + /// --port port1 --host host1 + if (!prev_port_arg.empty()) + { + hosts_and_ports_arguments.push_back({host_arg, prev_port_arg}); + prev_port_arg.clear(); + } + else + { + /// --host host1 --host host2 + if (!prev_host_arg.empty()) + hosts_and_ports_arguments.push_back({prev_host_arg}); + + prev_host_arg = host_arg; + } + } + else if (startsWith(arg, "--port")) + { + std::string port_arg = arg; + /// --port port + if (arg == "--port"sv) + { + port_arg.push_back('='); + ++arg_num; + if (arg_num >= argc) + throw Exception("Port argument requires value", ErrorCodes::BAD_ARGUMENTS); + arg = argv[arg_num]; + port_arg.append(arg); + } + + /// --host host1 --port port1 + if (!prev_host_arg.empty()) + { + hosts_and_ports_arguments.push_back({port_arg, prev_host_arg}); + prev_host_arg.clear(); + } + else + { + /// --port port1 --port port2 + if (!prev_port_arg.empty()) + hosts_and_ports_arguments.push_back({prev_port_arg}); + + prev_port_arg = port_arg; + } + } + else + common_arguments.emplace_back(arg); + } + } + if (!prev_host_arg.empty()) + hosts_and_ports_arguments.push_back({prev_host_arg}); + if (!prev_port_arg.empty()) + hosts_and_ports_arguments.push_back({prev_port_arg}); +} + } diff --git a/programs/client/Client.h b/programs/client/Client.h index 45bdd077351..f1e1c2669c7 100644 --- a/programs/client/Client.h +++ b/programs/client/Client.h @@ -32,6 +32,13 @@ protected: const std::vector & hosts_and_ports_arguments) override; void processConfig() override; + void readArguments( + int argc, + char ** argv, + Arguments & common_arguments, + std::vector & external_tables_arguments, + std::vector & hosts_and_ports_arguments) override; + private: void printChangedSettings() const; std::vector loadWarningMessages(); diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index eb3a03d0564..08f37f6e29b 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -799,6 +799,15 @@ void LocalServer::processOptions(const OptionsDescription &, const CommandLineOp config().setString("logger.level", options["logger.level"].as()); } +void LocalServer::readArguments(int argc, char ** argv, Arguments & common_arguments, std::vector &, std::vector &) +{ + for (int arg_num = 1; arg_num < argc; ++arg_num) + { + const char * arg = argv[arg_num]; + common_arguments.emplace_back(arg); + } +} + } diff --git a/programs/local/LocalServer.h b/programs/local/LocalServer.h index cc186e343c1..fc09d53b37f 100644 --- a/programs/local/LocalServer.h +++ b/programs/local/LocalServer.h @@ -43,6 +43,8 @@ protected: void processOptions(const OptionsDescription & options_description, const CommandLineOptions & options, const std::vector &, const std::vector &) override; void processConfig() override; + void readArguments(int argc, char ** argv, Arguments & common_arguments, std::vector &, std::vector &) override; + private: /** Composes CREATE subquery based on passed arguments (--structure --file --table and --input-format) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 7dfa60ad560..c5d5d89defd 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -2,7 +2,6 @@ #include #include -#include #include #include #include @@ -1731,156 +1730,6 @@ void ClientBase::showClientVersion() } -void ClientBase::readArguments( - int argc, - char ** argv, - Arguments & common_arguments, - std::vector & external_tables_arguments, - std::vector & hosts_and_ports_arguments) -{ - /** We allow different groups of arguments: - * - common arguments; - * - arguments for any number of external tables each in form "--external args...", - * where possible args are file, name, format, structure, types; - * - param arguments for prepared statements. - * Split these groups before processing. - */ - - bool in_external_group = false; - - std::string prev_host_arg; - std::string prev_port_arg; - - for (int arg_num = 1; arg_num < argc; ++arg_num) - { - const char * arg = argv[arg_num]; - - if (arg == "--external"sv) - { - in_external_group = true; - external_tables_arguments.emplace_back(Arguments{""}); - } - /// Options with value after equal sign. - else if (in_external_group - && (0 == strncmp(arg, "--file=", strlen("--file=")) || 0 == strncmp(arg, "--name=", strlen("--name=")) - || 0 == strncmp(arg, "--format=", strlen("--format=")) || 0 == strncmp(arg, "--structure=", strlen("--structure=")) - || 0 == strncmp(arg, "--types=", strlen("--types=")))) - { - external_tables_arguments.back().emplace_back(arg); - } - /// Options with value after whitespace. - else if (in_external_group - && (arg == "--file"sv || arg == "--name"sv || arg == "--format"sv - || arg == "--structure"sv || arg == "--types"sv)) - { - if (arg_num + 1 < argc) - { - external_tables_arguments.back().emplace_back(arg); - ++arg_num; - arg = argv[arg_num]; - external_tables_arguments.back().emplace_back(arg); - } - else - break; - } - else - { - in_external_group = false; - - /// Parameter arg after underline. - if (startsWith(arg, "--param_")) - { - const char * param_continuation = arg + strlen("--param_"); - const char * equal_pos = strchr(param_continuation, '='); - - if (equal_pos == param_continuation) - throw Exception("Parameter name cannot be empty", ErrorCodes::BAD_ARGUMENTS); - - if (equal_pos) - { - /// param_name=value - query_parameters.emplace(String(param_continuation, equal_pos), String(equal_pos + 1)); - } - else - { - /// param_name value - ++arg_num; - if (arg_num >= argc) - throw Exception("Parameter requires value", ErrorCodes::BAD_ARGUMENTS); - arg = argv[arg_num]; - query_parameters.emplace(String(param_continuation), String(arg)); - } - } - else if (startsWith(arg, "--host") || startsWith(arg, "-h")) - { - std::string host_arg; - /// --host host - if (arg == "--host"sv || arg == "-h"sv) - { - ++arg_num; - if (arg_num >= argc) - throw Exception("Host argument requires value", ErrorCodes::BAD_ARGUMENTS); - arg = argv[arg_num]; - host_arg = "--host="; - host_arg.append(arg); - } - else - host_arg = arg; - - /// --port port1 --host host1 - if (!prev_port_arg.empty()) - { - hosts_and_ports_arguments.push_back({host_arg, prev_port_arg}); - prev_port_arg.clear(); - } - else - { - /// --host host1 --host host2 - if (!prev_host_arg.empty()) - hosts_and_ports_arguments.push_back({prev_host_arg}); - - prev_host_arg = host_arg; - } - } - else if (startsWith(arg, "--port")) - { - std::string port_arg = arg; - /// --port port - if (arg == "--port"sv) - { - port_arg.push_back('='); - ++arg_num; - if (arg_num >= argc) - throw Exception("Port argument requires value", ErrorCodes::BAD_ARGUMENTS); - arg = argv[arg_num]; - port_arg.append(arg); - } - - /// --host host1 --port port1 - if (!prev_host_arg.empty()) - { - hosts_and_ports_arguments.push_back({port_arg, prev_host_arg}); - prev_host_arg.clear(); - } - else - { - /// --port port1 --port port2 - if (!prev_port_arg.empty()) - hosts_and_ports_arguments.push_back({prev_port_arg}); - - prev_port_arg = port_arg; - } - } - else - common_arguments.emplace_back(arg); - } - } - if (!prev_host_arg.empty()) - hosts_and_ports_arguments.push_back({prev_host_arg}); - if (!prev_port_arg.empty()) - hosts_and_ports_arguments.push_back({prev_port_arg}); -} - void ClientBase::parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments) { cmd_settings.addProgramOptions(options_description.main_description.value()); diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index a92888868a4..faa2ec84f3b 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -105,6 +105,13 @@ protected: protected: bool processQueryText(const String & text); + virtual void readArguments( + int argc, + char ** argv, + Arguments & common_arguments, + std::vector & external_tables_arguments, + std::vector & hosts_and_ports_arguments) = 0; + private: void receiveResult(ASTPtr parsed_query); @@ -136,12 +143,6 @@ private: void resetOutput(); void outputQueryInfo(bool echo_query_); - void readArguments( - int argc, - char ** argv, - Arguments & common_arguments, - std::vector & external_tables_arguments, - std::vector & hosts_and_ports_arguments); void parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments); void updateSuggest(const ASTCreateQuery & ast_create); From 4cc8d7570e4bb690628f7fc257927013e3ab5891 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=89=AC?= <654010905@qq.com> Date: Tue, 1 Mar 2022 19:21:46 +0800 Subject: [PATCH 006/106] Update 01342_query_parameters_alias.sh --- tests/queries/0_stateless/01342_query_parameters_alias.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01342_query_parameters_alias.sh b/tests/queries/0_stateless/01342_query_parameters_alias.sh index 11fbe37dabb..e8ccf5c297e 100755 --- a/tests/queries/0_stateless/01342_query_parameters_alias.sh +++ b/tests/queries/0_stateless/01342_query_parameters_alias.sh @@ -4,4 +4,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT --param_x '\N' --query 'SELECT {x:Nullable(Nothing)} as a' --format TSVWithNamesAndTypes +$CLICKHOUSE_CLIENT --param_x '\N' --query 'SELECT {x:Nullable(Nothing)} as a' --external --format TSVWithNamesAndTypes From 40f418f00f35d9e657026fd97bb2bbdd3c491baa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=89=AC?= <654010905@qq.com> Date: Tue, 1 Mar 2022 19:24:48 +0800 Subject: [PATCH 007/106] Update 01814_distributed_push_down_limit.sh --- tests/queries/0_stateless/01814_distributed_push_down_limit.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01814_distributed_push_down_limit.sh b/tests/queries/0_stateless/01814_distributed_push_down_limit.sh index d995e3a1370..415f873a4b3 100755 --- a/tests/queries/0_stateless/01814_distributed_push_down_limit.sh +++ b/tests/queries/0_stateless/01814_distributed_push_down_limit.sh @@ -115,7 +115,7 @@ function main() trap cleanup EXIT echo 'distributed_push_down_limit=0' - test_distributed_push_down_limit_0 --format Null + test_distributed_push_down_limit_0 --external --format Null # # The following tests (tests with distributed_push_down_limit=1) requires From 7a182f13908116f916435f886f51b00467f62509 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=89=AC?= <654010905@qq.com> Date: Tue, 1 Mar 2022 19:25:35 +0800 Subject: [PATCH 008/106] Update 02044_url_glob_parallel.sh --- tests/queries/0_stateless/02044_url_glob_parallel.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02044_url_glob_parallel.sh b/tests/queries/0_stateless/02044_url_glob_parallel.sh index c9c779a9ddb..386c8e06a69 100755 --- a/tests/queries/0_stateless/02044_url_glob_parallel.sh +++ b/tests/queries/0_stateless/02044_url_glob_parallel.sh @@ -10,6 +10,6 @@ i=0 retries=60 # Sometimes five seconds are not enough due to system overload. # But if it can run in less than five seconds at least sometimes - it is enough for the test. while [[ $i -lt $retries ]]; do - timeout 5s ${CLICKHOUSE_CLIENT} --max_threads 10 --query "SELECT * FROM url('http://127.0.0.{1..10}:${CLICKHOUSE_PORT_HTTP}/?query=SELECT+sleep(1)', TSV, 'x UInt8')" --format Null && break + timeout 5s ${CLICKHOUSE_CLIENT} --max_threads 10 --query "SELECT * FROM url('http://127.0.0.{1..10}:${CLICKHOUSE_PORT_HTTP}/?query=SELECT+sleep(1)', TSV, 'x UInt8')" --external --format Null && break ((++i)) done From f2ce7cbc8fec074f805710e4bc839a05bfbf73bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=89=AC?= <654010905@qq.com> Date: Tue, 1 Mar 2022 19:29:59 +0800 Subject: [PATCH 009/106] Update Client.cpp --- programs/client/Client.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index f7da4f5a8a0..9f3e3684a78 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -1279,7 +1279,7 @@ void Client::readArguments( else { in_external_group = false; - if (arg == "--file"sv || arg == "--name"sv || arg == "--format"sv || arg == "--structure"sv || arg == "--types"sv) + if (arg == "--file"sv || arg == "--name"sv || arg == "--structure"sv || arg == "--types"sv) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter must be in external group, try add --external before {}", arg); /// Parameter arg after underline. From e2aabde2885bb43753e78b18211b35e167167727 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=89=AC?= <654010905@qq.com> Date: Tue, 1 Mar 2022 19:30:24 +0800 Subject: [PATCH 010/106] Update 01342_query_parameters_alias.sh --- tests/queries/0_stateless/01342_query_parameters_alias.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01342_query_parameters_alias.sh b/tests/queries/0_stateless/01342_query_parameters_alias.sh index e8ccf5c297e..11fbe37dabb 100755 --- a/tests/queries/0_stateless/01342_query_parameters_alias.sh +++ b/tests/queries/0_stateless/01342_query_parameters_alias.sh @@ -4,4 +4,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT --param_x '\N' --query 'SELECT {x:Nullable(Nothing)} as a' --external --format TSVWithNamesAndTypes +$CLICKHOUSE_CLIENT --param_x '\N' --query 'SELECT {x:Nullable(Nothing)} as a' --format TSVWithNamesAndTypes From fafd7c6e37fc3639736111d701883c8a300c3ba9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=89=AC?= <654010905@qq.com> Date: Tue, 1 Mar 2022 19:31:12 +0800 Subject: [PATCH 011/106] Update 01814_distributed_push_down_limit.sh --- tests/queries/0_stateless/01814_distributed_push_down_limit.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01814_distributed_push_down_limit.sh b/tests/queries/0_stateless/01814_distributed_push_down_limit.sh index 415f873a4b3..d995e3a1370 100755 --- a/tests/queries/0_stateless/01814_distributed_push_down_limit.sh +++ b/tests/queries/0_stateless/01814_distributed_push_down_limit.sh @@ -115,7 +115,7 @@ function main() trap cleanup EXIT echo 'distributed_push_down_limit=0' - test_distributed_push_down_limit_0 --external --format Null + test_distributed_push_down_limit_0 --format Null # # The following tests (tests with distributed_push_down_limit=1) requires From ce99d7f2cd1893cbfba85447d61df9f3946f26fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=89=AC?= <654010905@qq.com> Date: Tue, 1 Mar 2022 19:31:34 +0800 Subject: [PATCH 012/106] Update 02044_url_glob_parallel.sh --- tests/queries/0_stateless/02044_url_glob_parallel.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02044_url_glob_parallel.sh b/tests/queries/0_stateless/02044_url_glob_parallel.sh index 386c8e06a69..c9c779a9ddb 100755 --- a/tests/queries/0_stateless/02044_url_glob_parallel.sh +++ b/tests/queries/0_stateless/02044_url_glob_parallel.sh @@ -10,6 +10,6 @@ i=0 retries=60 # Sometimes five seconds are not enough due to system overload. # But if it can run in less than five seconds at least sometimes - it is enough for the test. while [[ $i -lt $retries ]]; do - timeout 5s ${CLICKHOUSE_CLIENT} --max_threads 10 --query "SELECT * FROM url('http://127.0.0.{1..10}:${CLICKHOUSE_PORT_HTTP}/?query=SELECT+sleep(1)', TSV, 'x UInt8')" --external --format Null && break + timeout 5s ${CLICKHOUSE_CLIENT} --max_threads 10 --query "SELECT * FROM url('http://127.0.0.{1..10}:${CLICKHOUSE_PORT_HTTP}/?query=SELECT+sleep(1)', TSV, 'x UInt8')" --format Null && break ((++i)) done From 630182b2b12a2105c52085413a83bfcf82623800 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 14 Mar 2022 14:42:09 +0000 Subject: [PATCH 013/106] minor renames --- .../MergeTree/FutureMergedMutatedPart.cpp | 2 +- .../MergeTree/FutureMergedMutatedPart.h | 2 +- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 26 ++++++++-------- src/Storages/MergeTree/MergeAlgorithm.cpp | 26 ---------------- src/Storages/MergeTree/MergeAlgorithm.h | 2 -- .../MergeTree/MergeFromLogEntryTask.cpp | 2 +- .../MergeTreeBaseSelectProcessor.cpp | 2 +- src/Storages/MergeTree/MergeTreeData.cpp | 30 +++++++++---------- .../MergeTree/MergeTreeDataMergerMutator.cpp | 4 +-- .../MergeTree/MergeTreeDataPartCompact.cpp | 4 +-- .../MergeTree/MergeTreeDataPartInMemory.cpp | 4 +-- .../MergeTree/MergeTreeDataPartType.cpp | 24 ++++----------- .../MergeTree/MergeTreeDataPartType.h | 10 +++---- .../MergeTree/MergeTreeDataPartWide.cpp | 4 +-- .../MergeTree/MergeTreeDataWriter.cpp | 10 +++---- .../MergeTreeIndexGranularityInfo.cpp | 18 +++++------ .../MergeTree/MergeTreeWriteAheadLog.cpp | 2 +- src/Storages/MergeTree/MergeType.cpp | 27 +++-------------- src/Storages/MergeTree/MergeType.h | 8 ++--- src/Storages/MergeTree/MutateTask.cpp | 2 +- .../MergeTree/ReplicatedMergeTreeLogEntry.cpp | 6 ++-- .../MergeTree/ReplicatedMergeTreeLogEntry.h | 2 +- src/Storages/MergeTree/checkDataPart.cpp | 6 ++-- 23 files changed, 81 insertions(+), 142 deletions(-) delete mode 100644 src/Storages/MergeTree/MergeAlgorithm.cpp diff --git a/src/Storages/MergeTree/FutureMergedMutatedPart.cpp b/src/Storages/MergeTree/FutureMergedMutatedPart.cpp index acbdcdcf38a..019b24f6916 100644 --- a/src/Storages/MergeTree/FutureMergedMutatedPart.cpp +++ b/src/Storages/MergeTree/FutureMergedMutatedPart.cpp @@ -16,7 +16,7 @@ void FutureMergedMutatedPart::assign(MergeTreeData::DataPartsVector parts_) size_t sum_rows = 0; size_t sum_bytes_uncompressed = 0; - MergeTreeDataPartType future_part_type = MergeTreeDataPartType::UNKNOWN; + MergeTreeDataPartType future_part_type = MergeTreeDataPartType::Unknown; for (const auto & part : parts_) { sum_rows += part->rows_count; diff --git a/src/Storages/MergeTree/FutureMergedMutatedPart.h b/src/Storages/MergeTree/FutureMergedMutatedPart.h index 9ff39b44c8c..4447687c3d9 100644 --- a/src/Storages/MergeTree/FutureMergedMutatedPart.h +++ b/src/Storages/MergeTree/FutureMergedMutatedPart.h @@ -22,7 +22,7 @@ struct FutureMergedMutatedPart MergeTreeDataPartType type; MergeTreePartInfo part_info; MergeTreeData::DataPartsVector parts; - MergeType merge_type = MergeType::REGULAR; + MergeType merge_type = MergeType::Regular; const MergeTreePartition & getPartition() const { return parts.front()->partition; } diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 9028023dc80..c9d6b03d2e7 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -251,16 +251,16 @@ static void incrementTypeMetric(MergeTreeDataPartType type) { switch (type.getValue()) { - case MergeTreeDataPartType::WIDE: + case MergeTreeDataPartType::Wide: CurrentMetrics::add(CurrentMetrics::PartsWide); return; - case MergeTreeDataPartType::COMPACT: + case MergeTreeDataPartType::Compact: CurrentMetrics::add(CurrentMetrics::PartsCompact); return; - case MergeTreeDataPartType::IN_MEMORY: + case MergeTreeDataPartType::InMemory: CurrentMetrics::add(CurrentMetrics::PartsInMemory); return; - case MergeTreeDataPartType::UNKNOWN: + case MergeTreeDataPartType::Unknown: return; } } @@ -269,16 +269,16 @@ static void decrementTypeMetric(MergeTreeDataPartType type) { switch (type.getValue()) { - case MergeTreeDataPartType::WIDE: + case MergeTreeDataPartType::Wide: CurrentMetrics::sub(CurrentMetrics::PartsWide); return; - case MergeTreeDataPartType::COMPACT: + case MergeTreeDataPartType::Compact: CurrentMetrics::sub(CurrentMetrics::PartsCompact); return; - case MergeTreeDataPartType::IN_MEMORY: + case MergeTreeDataPartType::InMemory: CurrentMetrics::sub(CurrentMetrics::PartsInMemory); return; - case MergeTreeDataPartType::UNKNOWN: + case MergeTreeDataPartType::Unknown: return; } } @@ -903,7 +903,7 @@ void IMergeTreeDataPart::loadRowsCount() { rows_count = 0; } - else if (storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING || part_type == Type::COMPACT || parent_part) + else if (storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING || part_type == Type::Compact || parent_part) { if (!volume->getDisk()->exists(path)) throw Exception("No count.txt in part " + name, ErrorCodes::NO_FILE_IN_DATA_PART); @@ -1054,7 +1054,7 @@ void IMergeTreeDataPart::loadColumns(bool require) if (!volume->getDisk()->exists(path)) { /// We can get list of columns only from columns.txt in compact parts. - if (require || part_type == Type::COMPACT) + if (require || part_type == Type::Compact) throw Exception("No columns.txt in part " + name + ", expected path " + path + " on drive " + volume->getDisk()->getName(), ErrorCodes::NO_FILE_IN_DATA_PART); @@ -1663,17 +1663,17 @@ String IMergeTreeDataPart::getZeroLevelPartBlockID(std::string_view token) const bool isCompactPart(const MergeTreeDataPartPtr & data_part) { - return (data_part && data_part->getType() == MergeTreeDataPartType::COMPACT); + return (data_part && data_part->getType() == MergeTreeDataPartType::Compact); } bool isWidePart(const MergeTreeDataPartPtr & data_part) { - return (data_part && data_part->getType() == MergeTreeDataPartType::WIDE); + return (data_part && data_part->getType() == MergeTreeDataPartType::Wide); } bool isInMemoryPart(const MergeTreeDataPartPtr & data_part) { - return (data_part && data_part->getType() == MergeTreeDataPartType::IN_MEMORY); + return (data_part && data_part->getType() == MergeTreeDataPartType::InMemory); } } diff --git a/src/Storages/MergeTree/MergeAlgorithm.cpp b/src/Storages/MergeTree/MergeAlgorithm.cpp deleted file mode 100644 index 9f73557e701..00000000000 --- a/src/Storages/MergeTree/MergeAlgorithm.cpp +++ /dev/null @@ -1,26 +0,0 @@ -#include -#include - -namespace DB -{ -namespace ErrorCodes -{ - extern const int NOT_IMPLEMENTED; -} - -String toString(MergeAlgorithm merge_algorithm) -{ - switch (merge_algorithm) - { - case MergeAlgorithm::Undecided: - return "Undecided"; - case MergeAlgorithm::Horizontal: - return "Horizontal"; - case MergeAlgorithm::Vertical: - return "Vertical"; - } - - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unknown MergeAlgorithm {}", static_cast(merge_algorithm)); -} - -} diff --git a/src/Storages/MergeTree/MergeAlgorithm.h b/src/Storages/MergeTree/MergeAlgorithm.h index 5f52b5be0fd..9123182b71e 100644 --- a/src/Storages/MergeTree/MergeAlgorithm.h +++ b/src/Storages/MergeTree/MergeAlgorithm.h @@ -12,6 +12,4 @@ enum class MergeAlgorithm Vertical /// per-row merge of PK and secondary indices columns, per-column gather for non-PK columns }; -String toString(MergeAlgorithm merge_algorithm); - } diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp index bb4ea22a1ac..3c0ecb1a487 100644 --- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp @@ -33,7 +33,7 @@ std::pair MergeFromLogEntryT return {false, {}}; } - if (entry.merge_type == MergeType::TTL_RECOMPRESS && + if (entry.merge_type == MergeType::TTLRecompress && (time(nullptr) - entry.create_time) <= storage_settings_ptr->try_fetch_recompressed_part_timeout.totalSeconds() && entry.source_replica != storage.replica_name) { diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp index 5b69a4e68b6..79b0064fc16 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp @@ -658,7 +658,7 @@ size_t MergeTreeBaseSelectProcessor::estimateMaxBatchSizeForHugeRanges() size_t sum_average_marks_size = 0; /// getColumnSize is not fully implemented for compact parts - if (task->data_part->getType() == IMergeTreeDataPart::Type::COMPACT) + if (task->data_part->getType() == IMergeTreeDataPart::Type::Compact) { sum_average_marks_size = average_granule_size_bytes; } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 7f407199e81..6cce65aa53e 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -2233,27 +2233,27 @@ MergeTreeDataPartType MergeTreeData::choosePartType(size_t bytes_uncompressed, s { const auto settings = getSettings(); if (!canUsePolymorphicParts(*settings)) - return MergeTreeDataPartType::WIDE; + return MergeTreeDataPartType::Wide; if (bytes_uncompressed < settings->min_bytes_for_compact_part || rows_count < settings->min_rows_for_compact_part) - return MergeTreeDataPartType::IN_MEMORY; + return MergeTreeDataPartType::InMemory; if (bytes_uncompressed < settings->min_bytes_for_wide_part || rows_count < settings->min_rows_for_wide_part) - return MergeTreeDataPartType::COMPACT; + return MergeTreeDataPartType::Compact; - return MergeTreeDataPartType::WIDE; + return MergeTreeDataPartType::Wide; } MergeTreeDataPartType MergeTreeData::choosePartTypeOnDisk(size_t bytes_uncompressed, size_t rows_count) const { const auto settings = getSettings(); if (!canUsePolymorphicParts(*settings)) - return MergeTreeDataPartType::WIDE; + return MergeTreeDataPartType::Wide; if (bytes_uncompressed < settings->min_bytes_for_wide_part || rows_count < settings->min_rows_for_wide_part) - return MergeTreeDataPartType::COMPACT; + return MergeTreeDataPartType::Compact; - return MergeTreeDataPartType::WIDE; + return MergeTreeDataPartType::Wide; } @@ -2261,11 +2261,11 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::createPart(const String & name, MergeTreeDataPartType type, const MergeTreePartInfo & part_info, const VolumePtr & volume, const String & relative_path, const IMergeTreeDataPart * parent_part) const { - if (type == MergeTreeDataPartType::COMPACT) + if (type == MergeTreeDataPartType::Compact) return std::make_shared(*this, name, part_info, volume, relative_path, parent_part); - else if (type == MergeTreeDataPartType::WIDE) + else if (type == MergeTreeDataPartType::Wide) return std::make_shared(*this, name, part_info, volume, relative_path, parent_part); - else if (type == MergeTreeDataPartType::IN_MEMORY) + else if (type == MergeTreeDataPartType::InMemory) return std::make_shared(*this, name, part_info, volume, relative_path, parent_part); else throw Exception("Unknown type of part " + relative_path, ErrorCodes::UNKNOWN_PART_TYPE); @@ -2274,11 +2274,11 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::createPart(const String & name, static MergeTreeDataPartType getPartTypeFromMarkExtension(const String & mrk_ext) { if (mrk_ext == getNonAdaptiveMrkExtension()) - return MergeTreeDataPartType::WIDE; - if (mrk_ext == getAdaptiveMrkExtension(MergeTreeDataPartType::WIDE)) - return MergeTreeDataPartType::WIDE; - if (mrk_ext == getAdaptiveMrkExtension(MergeTreeDataPartType::COMPACT)) - return MergeTreeDataPartType::COMPACT; + return MergeTreeDataPartType::Wide; + if (mrk_ext == getAdaptiveMrkExtension(MergeTreeDataPartType::Wide)) + return MergeTreeDataPartType::Wide; + if (mrk_ext == getAdaptiveMrkExtension(MergeTreeDataPartType::Compact)) + return MergeTreeDataPartType::Compact; throw Exception("Can't determine part type, because of unknown mark extension " + mrk_ext, ErrorCodes::UNKNOWN_PART_TYPE); } diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index a6cda0016a8..1c59baa9f10 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -248,7 +248,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( parts_to_merge = delete_ttl_selector.select(parts_ranges, max_total_size_to_merge); if (!parts_to_merge.empty()) { - future_part->merge_type = MergeType::TTL_DELETE; + future_part->merge_type = MergeType::TTLDelete; } else if (metadata_snapshot->hasAnyRecompressionTTL()) { @@ -260,7 +260,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( parts_to_merge = recompress_ttl_selector.select(parts_ranges, max_total_size_to_merge); if (!parts_to_merge.empty()) - future_part->merge_type = MergeType::TTL_RECOMPRESS; + future_part->merge_type = MergeType::TTLRecompress; } } diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp index f4da730b1f0..d4336bd50df 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp @@ -22,7 +22,7 @@ MergeTreeDataPartCompact::MergeTreeDataPartCompact( const VolumePtr & volume_, const std::optional & relative_path_, const IMergeTreeDataPart * parent_part_) - : IMergeTreeDataPart(storage_, name_, volume_, relative_path_, Type::COMPACT, parent_part_) + : IMergeTreeDataPart(storage_, name_, volume_, relative_path_, Type::Compact, parent_part_) { } @@ -33,7 +33,7 @@ MergeTreeDataPartCompact::MergeTreeDataPartCompact( const VolumePtr & volume_, const std::optional & relative_path_, const IMergeTreeDataPart * parent_part_) - : IMergeTreeDataPart(storage_, name_, info_, volume_, relative_path_, Type::COMPACT, parent_part_) + : IMergeTreeDataPart(storage_, name_, info_, volume_, relative_path_, Type::Compact, parent_part_) { } diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp index e4a174a7d29..9f3b5e864da 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp @@ -23,7 +23,7 @@ MergeTreeDataPartInMemory::MergeTreeDataPartInMemory( const VolumePtr & volume_, const std::optional & relative_path_, const IMergeTreeDataPart * parent_part_) - : IMergeTreeDataPart(storage_, name_, volume_, relative_path_, Type::IN_MEMORY, parent_part_) + : IMergeTreeDataPart(storage_, name_, volume_, relative_path_, Type::InMemory, parent_part_) { default_codec = CompressionCodecFactory::instance().get("NONE", {}); } @@ -35,7 +35,7 @@ MergeTreeDataPartInMemory::MergeTreeDataPartInMemory( const VolumePtr & volume_, const std::optional & relative_path_, const IMergeTreeDataPart * parent_part_) - : IMergeTreeDataPart(storage_, name_, info_, volume_, relative_path_, Type::IN_MEMORY, parent_part_) + : IMergeTreeDataPart(storage_, name_, info_, volume_, relative_path_, Type::InMemory, parent_part_) { default_codec = CompressionCodecFactory::instance().get("NONE", {}); } diff --git a/src/Storages/MergeTree/MergeTreeDataPartType.cpp b/src/Storages/MergeTree/MergeTreeDataPartType.cpp index d08f485d214..59cea62121b 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartType.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartType.cpp @@ -1,4 +1,5 @@ #include +#include #include namespace DB @@ -11,29 +12,16 @@ namespace ErrorCodes void MergeTreeDataPartType::fromString(const String & str) { - if (str == "Wide") - value = WIDE; - else if (str == "Compact") - value = COMPACT; - else if (str == "InMemory") - value = IN_MEMORY; - else + auto maybe_value = magic_enum::enum_cast(str); + if (!maybe_value || *maybe_value == Value::Unknown) throw DB::Exception("Unexpected string for part type: " + str, ErrorCodes::UNKNOWN_PART_TYPE); + + value = *maybe_value; } String MergeTreeDataPartType::toString() const { - switch (value) - { - case WIDE: - return "Wide"; - case COMPACT: - return "Compact"; - case IN_MEMORY: - return "InMemory"; - default: - return "Unknown"; - } + return String(magic_enum::enum_name(value)); } } diff --git a/src/Storages/MergeTree/MergeTreeDataPartType.h b/src/Storages/MergeTree/MergeTreeDataPartType.h index fecd9d00cdc..21381dea796 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartType.h +++ b/src/Storages/MergeTree/MergeTreeDataPartType.h @@ -13,18 +13,18 @@ public: { /// Data of each column is stored in one or several (for complex types) files. /// Every data file is followed by marks file. - WIDE, + Wide, /// Data of all columns is stored in one file. Marks are also stored in single file. - COMPACT, + Compact, /// Format with buffering data in RAM. - IN_MEMORY, + InMemory, - UNKNOWN, + Unknown, }; - MergeTreeDataPartType() : value(UNKNOWN) {} + MergeTreeDataPartType() : value(Unknown) {} MergeTreeDataPartType(Value value_) : value(value_) {} bool operator==(const MergeTreeDataPartType & other) const diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp index b279c1aba6a..e20815c5d36 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp @@ -24,7 +24,7 @@ MergeTreeDataPartWide::MergeTreeDataPartWide( const VolumePtr & volume_, const std::optional & relative_path_, const IMergeTreeDataPart * parent_part_) - : IMergeTreeDataPart(storage_, name_, volume_, relative_path_, Type::WIDE, parent_part_) + : IMergeTreeDataPart(storage_, name_, volume_, relative_path_, Type::Wide, parent_part_) { } @@ -35,7 +35,7 @@ MergeTreeDataPartWide::MergeTreeDataPartWide( const VolumePtr & volume_, const std::optional & relative_path_, const IMergeTreeDataPart * parent_part_) - : IMergeTreeDataPart(storage_, name_, info_, volume_, relative_path_, Type::WIDE, parent_part_) + : IMergeTreeDataPart(storage_, name_, info_, volume_, relative_path_, Type::Wide, parent_part_) { } diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index d16b5274a45..a8ed1ba1e9f 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -566,9 +566,9 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPart( { String part_name = projection.name; MergeTreeDataPartType part_type; - if (parent_part->getType() == MergeTreeDataPartType::IN_MEMORY) + if (parent_part->getType() == MergeTreeDataPartType::InMemory) { - part_type = MergeTreeDataPartType::IN_MEMORY; + part_type = MergeTreeDataPartType::InMemory; } else { @@ -603,9 +603,9 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempProjectionPart( { String part_name = fmt::format("{}_{}", projection.name, block_num); MergeTreeDataPartType part_type; - if (parent_part->getType() == MergeTreeDataPartType::IN_MEMORY) + if (parent_part->getType() == MergeTreeDataPartType::InMemory) { - part_type = MergeTreeDataPartType::IN_MEMORY; + part_type = MergeTreeDataPartType::InMemory; } else { @@ -637,7 +637,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeInMemoryProjectionP { return writeProjectionPartImpl( projection.name, - MergeTreeDataPartType::IN_MEMORY, + MergeTreeDataPartType::InMemory, projection.name + ".proj" /* relative_path */, false /* is_temp */, parent_part, diff --git a/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp b/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp index 6da0a822f7f..4f4a99f1b01 100644 --- a/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp @@ -21,8 +21,8 @@ std::optional MergeTreeIndexGranularityInfo::getMarksExtensionFromF { const auto & ext = fs::path(it->path()).extension(); if (ext == getNonAdaptiveMrkExtension() - || ext == getAdaptiveMrkExtension(MergeTreeDataPartType::WIDE) - || ext == getAdaptiveMrkExtension(MergeTreeDataPartType::COMPACT)) + || ext == getAdaptiveMrkExtension(MergeTreeDataPartType::Wide) + || ext == getAdaptiveMrkExtension(MergeTreeDataPartType::Compact)) return ext; } } @@ -38,7 +38,7 @@ MergeTreeIndexGranularityInfo::MergeTreeIndexGranularityInfo(const MergeTreeData /// Granularity is fixed if (!storage.canUseAdaptiveGranularity()) { - if (type != MergeTreeDataPartType::WIDE) + if (type != MergeTreeDataPartType::Wide) throw Exception("Only Wide parts can be used with non-adaptive granularity.", ErrorCodes::NOT_IMPLEMENTED); setNonAdaptive(); } @@ -69,11 +69,11 @@ void MergeTreeIndexGranularityInfo::setNonAdaptive() size_t MergeTreeIndexGranularityInfo::getMarkSizeInBytes(size_t columns_num) const { - if (type == MergeTreeDataPartType::WIDE) + if (type == MergeTreeDataPartType::Wide) return is_adaptive ? getAdaptiveMrkSizeWide() : getNonAdaptiveMrkSizeWide(); - else if (type == MergeTreeDataPartType::COMPACT) + else if (type == MergeTreeDataPartType::Compact) return getAdaptiveMrkSizeCompact(columns_num); - else if (type == MergeTreeDataPartType::IN_MEMORY) + else if (type == MergeTreeDataPartType::InMemory) return 0; else throw Exception("Unknown part type", ErrorCodes::UNKNOWN_PART_TYPE); @@ -87,11 +87,11 @@ size_t getAdaptiveMrkSizeCompact(size_t columns_num) std::string getAdaptiveMrkExtension(MergeTreeDataPartType part_type) { - if (part_type == MergeTreeDataPartType::WIDE) + if (part_type == MergeTreeDataPartType::Wide) return ".mrk2"; - else if (part_type == MergeTreeDataPartType::COMPACT) + else if (part_type == MergeTreeDataPartType::Compact) return ".mrk3"; - else if (part_type == MergeTreeDataPartType::IN_MEMORY) + else if (part_type == MergeTreeDataPartType::InMemory) return ""; else throw Exception("Unknown part type", ErrorCodes::UNKNOWN_PART_TYPE); diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp index d7cddfe9c14..6228bc844c7 100644 --- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp +++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp @@ -155,7 +155,7 @@ MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore(const Stor part = storage.createPart( part_name, - MergeTreeDataPartType::IN_MEMORY, + MergeTreeDataPartType::InMemory, MergeTreePartInfo::fromPartName(part_name, storage.format_version), single_disk_volume, part_name); diff --git a/src/Storages/MergeTree/MergeType.cpp b/src/Storages/MergeTree/MergeType.cpp index e622eb33e31..4b03f5ab57c 100644 --- a/src/Storages/MergeTree/MergeType.cpp +++ b/src/Storages/MergeTree/MergeType.cpp @@ -1,4 +1,5 @@ #include +#include #include namespace DB @@ -11,35 +12,15 @@ namespace ErrorCodes MergeType checkAndGetMergeType(UInt64 merge_type) { - if (merge_type == static_cast(MergeType::REGULAR)) - return MergeType::REGULAR; - else if (merge_type == static_cast(MergeType::TTL_DELETE)) - return MergeType::TTL_DELETE; - else if (merge_type == static_cast(MergeType::TTL_RECOMPRESS)) - return MergeType::TTL_RECOMPRESS; - - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unknown MergeType {}", static_cast(merge_type)); -} - -String toString(MergeType merge_type) -{ - switch (merge_type) - { - case MergeType::REGULAR: - return "REGULAR"; - case MergeType::TTL_DELETE: - return "TTL_DELETE"; - case MergeType::TTL_RECOMPRESS: - return "TTL_RECOMPRESS"; - - } + if (auto maybe_merge_type = magic_enum::enum_cast(merge_type)) + return *maybe_merge_type; throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unknown MergeType {}", static_cast(merge_type)); } bool isTTLMergeType(MergeType merge_type) { - return merge_type == MergeType::TTL_DELETE || merge_type == MergeType::TTL_RECOMPRESS; + return merge_type == MergeType::TTLDelete || merge_type == MergeType::TTLRecompress; } } diff --git a/src/Storages/MergeTree/MergeType.h b/src/Storages/MergeTree/MergeType.h index 53d55dc68a7..fad1ba33e3e 100644 --- a/src/Storages/MergeTree/MergeType.h +++ b/src/Storages/MergeTree/MergeType.h @@ -14,18 +14,16 @@ namespace DB enum class MergeType { /// Just regular merge - REGULAR = 1, + Regular = 1, /// Merge assigned to delete some data from parts (with TTLMergeSelector) - TTL_DELETE = 2, + TTLDelete = 2, /// Merge with recompression - TTL_RECOMPRESS = 3, + TTLRecompress = 3, }; /// Check parsed merge_type from raw int and get enum value. MergeType checkAndGetMergeType(UInt64 merge_type); -String toString(MergeType merge_type); - /// Check this merge assigned with TTL bool isTTLMergeType(MergeType merge_type); diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index c001b319c99..dbcee9e5d80 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -775,7 +775,7 @@ void PartMergerWriter::prepare() // build in-memory projection because we don't support merging into a new in-memory part. // Otherwise we split the materialization into multiple stages similar to the process of // INSERT SELECT query. - if (ctx->new_data_part->getType() == MergeTreeDataPartType::IN_MEMORY) + if (ctx->new_data_part->getType() == MergeTreeDataPartType::InMemory) projection_squashes.emplace_back(0, 0); else projection_squashes.emplace_back(settings.min_insert_block_size_rows, settings.min_insert_block_size_bytes); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp index 89515b863ca..5da67c5ad8c 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp @@ -79,7 +79,7 @@ void ReplicatedMergeTreeLogEntryData::writeText(WriteBuffer & out) const out << "into\n" << new_part_name; out << "\ndeduplicate: " << deduplicate; - if (merge_type != MergeType::REGULAR) + if (merge_type != MergeType::Regular) out <<"\nmerge_type: " << static_cast(merge_type); if (new_part_uuid != UUIDHelpers::Nil) @@ -165,7 +165,7 @@ void ReplicatedMergeTreeLogEntryData::writeText(WriteBuffer & out) const out << '\n'; - if (new_part_type != MergeTreeDataPartType::WIDE && new_part_type != MergeTreeDataPartType::UNKNOWN) + if (new_part_type != MergeTreeDataPartType::Wide && new_part_type != MergeTreeDataPartType::Unknown) out << "part_type: " << new_part_type.toString() << "\n"; if (quorum) @@ -348,7 +348,7 @@ void ReplicatedMergeTreeLogEntryData::readText(ReadBuffer & in) in >> "\n"; } else - new_part_type = MergeTreeDataPartType::WIDE; + new_part_type = MergeTreeDataPartType::Wide; /// Optional field. if (!in.eof()) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h index 4d8f319c94a..473974958ca 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h @@ -96,7 +96,7 @@ struct ReplicatedMergeTreeLogEntryData Strings source_parts; bool deduplicate = false; /// Do deduplicate on merge Strings deduplicate_by_columns = {}; // Which columns should be checked for duplicates, empty means 'all' (default). - MergeType merge_type = MergeType::REGULAR; + MergeType merge_type = MergeType::Regular; String column_name; String index_name; diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp index 075e9e9fbc8..e7148898a70 100644 --- a/src/Storages/MergeTree/checkDataPart.cpp +++ b/src/Storages/MergeTree/checkDataPart.cpp @@ -135,7 +135,7 @@ IMergeTreeDataPart::Checksums checkDataPart( IMergeTreeDataPart::Checksums projection_checksums_data; const auto & projection_path = file_path; - if (part_type == MergeTreeDataPartType::COMPACT) + if (part_type == MergeTreeDataPartType::Compact) { auto proj_path = file_path + MergeTreeDataPartCompact::DATA_FILE_NAME_WITH_EXTENSION; auto file_buf = disk->readFile(proj_path); @@ -209,7 +209,7 @@ IMergeTreeDataPart::Checksums checkDataPart( bool check_uncompressed = true; /// First calculate checksums for columns data - if (part_type == MergeTreeDataPartType::COMPACT) + if (part_type == MergeTreeDataPartType::Compact) { const auto & file_name = MergeTreeDataPartCompact::DATA_FILE_NAME_WITH_EXTENSION; checksum_file(path + file_name, file_name); @@ -217,7 +217,7 @@ IMergeTreeDataPart::Checksums checkDataPart( /// We check only checksum of compressed file. check_uncompressed = false; } - else if (part_type == MergeTreeDataPartType::WIDE) + else if (part_type == MergeTreeDataPartType::Wide) { for (const auto & column : columns_list) { From b84904b8a6930c392fae6b1525a030ef42e8e418 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=89=AC?= <654010905@qq.com> Date: Wed, 23 Mar 2022 22:01:29 +0800 Subject: [PATCH 014/106] Update Client.cpp --- programs/client/Client.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 4e045724466..6d445f777df 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -1047,7 +1047,6 @@ void Client::readArguments( * - param arguments for prepared statements. * Split these groups before processing. */ - bool in_external_group = false; std::string prev_host_arg; From 3d7338581b35667413246165e4a09ae34e89add0 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Tue, 19 Apr 2022 16:07:30 +0800 Subject: [PATCH 015/106] Improve join now adding joined blocks from right table can be run parallelly, speedup the join process --- src/Core/Settings.h | 1 + src/Interpreters/ConcurrentHashJoin.cpp | 325 ++++++++++++++++++ src/Interpreters/ConcurrentHashJoin.h | 105 ++++++ src/Interpreters/ExpressionAnalyzer.cpp | 8 + src/Interpreters/IJoin.h | 3 + src/Interpreters/InterpreterSelectQuery.cpp | 3 +- src/Interpreters/TableJoin.cpp | 14 + src/Interpreters/TableJoin.h | 2 + src/Processors/QueryPlan/JoinStep.cpp | 6 +- src/Processors/QueryPlan/JoinStep.h | 4 +- src/QueryPipeline/QueryPipelineBuilder.cpp | 42 ++- src/QueryPipeline/QueryPipelineBuilder.h | 3 +- .../1_stateful/00172_parallel_join.reference | 299 ++++++++++++++++ .../1_stateful/00172_parallel_join.sql | 204 +++++++++++ 14 files changed, 1006 insertions(+), 13 deletions(-) create mode 100644 src/Interpreters/ConcurrentHashJoin.cpp create mode 100644 src/Interpreters/ConcurrentHashJoin.h create mode 100644 tests/queries/1_stateful/00172_parallel_join.reference create mode 100644 tests/queries/1_stateful/00172_parallel_join.sql diff --git a/src/Core/Settings.h b/src/Core/Settings.h index aa78456702c..2093333375e 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -351,6 +351,7 @@ class IColumn; M(UInt64, partial_merge_join_left_table_buffer_bytes, 0, "If not 0 group left table blocks in bigger ones for left-side table in partial merge join. It uses up to 2x of specified memory per joining thread.", 0) \ M(UInt64, partial_merge_join_rows_in_right_blocks, 65536, "Split right-hand joining data in blocks of specified size. It's a portion of data indexed by min-max values and possibly unloaded on disk.", 0) \ M(UInt64, join_on_disk_max_files_to_merge, 64, "For MergeJoin on disk set how much files it's allowed to sort simultaneously. Then this value bigger then more memory used and then less disk I/O needed. Minimum is 2.", 0) \ + M(Bool, enable_parallel_join, false, "Enable paralle join algorithm.", 0)\ M(String, temporary_files_codec, "LZ4", "Set compression codec for temporary files (sort and join on disk). I.e. LZ4, NONE.", 0) \ \ M(UInt64, max_rows_to_transfer, 0, "Maximum size (in rows) of the transmitted external table obtained when the GLOBAL IN/JOIN section is executed.", 0) \ diff --git a/src/Interpreters/ConcurrentHashJoin.cpp b/src/Interpreters/ConcurrentHashJoin.cpp new file mode 100644 index 00000000000..b397aec41e5 --- /dev/null +++ b/src/Interpreters/ConcurrentHashJoin.cpp @@ -0,0 +1,325 @@ +#include +#include +#include +#include +#include "Columns/FilterDescription.h" +#include "Columns/IColumn.h" +#include "Core/ColumnsWithTypeAndName.h" +#include "Core/NamesAndTypes.h" +#include "IO/WriteBufferFromString.h" +#include "Interpreters/ActionsDAG.h" +#include "Interpreters/ActionsVisitor.h" +#include "Interpreters/ExpressionActions.h" +#include "Interpreters/PreparedSets.h" +#include "Interpreters/SubqueryForSet.h" +#include "Parsers/DumpASTNode.h" +#include "Parsers/ExpressionListParsers.h" +#include "Parsers/IAST_fwd.h" +#include "Parsers/parseQuery.h" +#include +#include +#include "Common/Exception.h" +namespace DB +{ +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int SET_SIZE_LIMIT_EXCEEDED; + extern const int BAD_ARGUMENTS; +} +namespace JoinStuff +{ +ConcurrentHashJoin::ConcurrentHashJoin(ContextPtr context_, std::shared_ptr table_join_, size_t slots_, const Block & right_sample_block, bool any_take_last_row_) + : context(context_) + , table_join(table_join_) + , slots(slots_) +{ + if (!slots_) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid argument slot : {}", slots_); + } + + for (size_t i = 0; i < slots; ++i) + { + auto inner_hash_join = std::make_shared(); + inner_hash_join->data = std::make_unique(table_join_, right_sample_block, any_take_last_row_); + hash_joins.emplace_back(std::move(inner_hash_join)); + } + dispatch_datas.emplace_back(std::make_shared()); + dispatch_datas.emplace_back(std::make_shared()); +} + +bool ConcurrentHashJoin::addJoinedBlock(const Block & block, bool check_limits) +{ + auto & dispatch_data = getBlockDispatchControlData(block, RIGHT); + std::vector dispatched_blocks; + Block cloned_block = block; + dispatchBlock(dispatch_data, cloned_block, dispatched_blocks); + for (size_t i = 0; i < dispatched_blocks.size(); ++i) + { + auto & hash_join = hash_joins[i]; + auto & dispatched_block = dispatched_blocks[i]; + std::unique_lock lock(hash_join->mutex); + hash_join->rows += dispatched_block.rows(); + check_total_rows += dispatched_block.rows(); + check_total_bytes += dispatched_block.bytes(); + // Don't take the real insertion here, because inserting a block into HashTable is a time-consuming operation, + // it may cause serious lock contention and make the whole process slow. + hash_join->pending_right_blocks.emplace_back(std::move(dispatched_block)); + } + + if (check_limits) + return table_join->sizeLimits().check( + check_total_rows.load(), check_total_bytes.load(), "JOIN", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED); + return true; +} + +void ConcurrentHashJoin::joinBlock(Block & block, std::shared_ptr & not_processed) +{ + + if (block.rows()) + waitAllAddJoinedBlocksFinished(); + else + { + std::unique_lock lock(hash_joins[0]->mutex); + hash_joins[0]->data->joinBlock(block, not_processed); + return; + } + + auto & dispatch_data = getBlockDispatchControlData(block, LEFT); + std::vector dispatched_blocks; + Block cloned_block = block; + dispatchBlock(dispatch_data, cloned_block, dispatched_blocks); + for (size_t i = 0; i < dispatched_blocks.size(); ++i) + { + std::shared_ptr none_extra_block; + auto & hash_join = hash_joins[i]; + auto & dispatched_block = dispatched_blocks[i]; + hash_join->data->joinBlock(dispatched_block, none_extra_block); + if (none_extra_block && !none_extra_block->empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "not_processed should be empty"); + } + + ColumnsWithTypeAndName final_columns; + MutableColumns mutable_final_columns; + NamesAndTypesList names_and_types = dispatched_blocks[0].getNamesAndTypesList(); + auto types = names_and_types.getTypes(); + for (auto & dispatched_block : dispatched_blocks) + { + for (size_t pos = 0; pos < dispatched_block.columns(); ++pos) + { + auto & from_column = dispatched_block.getByPosition(pos); + if (mutable_final_columns.size() <= pos) + { + mutable_final_columns.emplace_back(from_column.column->cloneEmpty()); + } + if (!from_column.column->empty()) + { + mutable_final_columns[pos]->insertRangeFrom(*from_column.column, 0, from_column.column->size()); + } + } + } + + size_t i = 0; + for (auto & name_and_type : names_and_types) + { + ColumnPtr col_ptr = std::move(mutable_final_columns[i]); + mutable_final_columns[i] = nullptr; + ColumnWithTypeAndName col(col_ptr, name_and_type.type, name_and_type.name); + final_columns.emplace_back(col); + i += 1; + } + block = Block(final_columns); +} + +void ConcurrentHashJoin::checkTypesOfKeys(const Block & block) const +{ + hash_joins[0]->data->checkTypesOfKeys(block); +} + +void ConcurrentHashJoin::setTotals(const Block & block) +{ + if (block) + { + std::lock_guard lock(totals_mutex); + totals = block; + } +} + +const Block & ConcurrentHashJoin::getTotals() const +{ + return totals; +} + +size_t ConcurrentHashJoin::getTotalRowCount() const +{ + size_t res = 0; + for (const auto & hash_join : hash_joins) + { + res += hash_join->data->getTotalRowCount(); + } + return res; +} + +size_t ConcurrentHashJoin::getTotalByteCount() const +{ + size_t res = 0; + for (const auto & hash_join : hash_joins) + { + res += hash_join->data->getTotalByteCount(); + } + return res; +} + +bool ConcurrentHashJoin::alwaysReturnsEmptySet() const +{ + for (const auto & hash_join : hash_joins) + { + if (!hash_join->data->alwaysReturnsEmptySet() || !hash_join->pending_right_blocks.empty()) + return false; + } + return true; +} + +std::shared_ptr ConcurrentHashJoin::getNonJoinedBlocks( + const Block & /*left_sample_block*/, const Block & /*result_sample_block*/, UInt64 /*max_block_size*/) const +{ + if (table_join->strictness() == ASTTableJoin::Strictness::Asof || + table_join->strictness() == ASTTableJoin::Strictness::Semi || + !isRightOrFull(table_join->kind())) + { + return {}; + } + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid join type. join kind: {}, strictness: {}", table_join->kind(), table_join->strictness()); +} + +std::shared_ptr ConcurrentHashJoin::buildHashExpressionAction(const Block & block, const Strings & based_columns_names, Strings & hash_columns_names) +{ + WriteBufferFromOwnString col_buf; + for (size_t i = 0, sz = based_columns_names.size(); i < sz; ++i) + { + if (i) + col_buf << ","; + col_buf << based_columns_names[i]; + } + WriteBufferFromOwnString write_buf; + for (size_t i = 0; i < slots; ++i) + { + if (i) + write_buf << ","; + write_buf << "cityHash64(" << col_buf.str() << ")%" << slots << "=" << i; + } + auto settings = context->getSettings(); + ParserExpressionList hash_expr_parser(true); + ASTPtr func_ast = parseQuery(hash_expr_parser, write_buf.str(), "Parse Block hash expression", settings.max_query_size, settings.max_parser_depth); + for (auto & child : func_ast->children) + hash_columns_names.emplace_back(child->getColumnName()); + + DebugASTLog visit_log; + const auto & names_and_types = block.getNamesAndTypesList(); + ActionsDAGPtr actions = std::make_shared(names_and_types); + PreparedSets prepared_sets; + SubqueriesForSets subqueries_for_sets; + ActionsVisitor::Data visitor_data( + context, + SizeLimits{settings.max_rows_in_set, settings.max_bytes_in_set, settings.set_overflow_mode}, + 10, + names_and_types, + std::move(actions), + prepared_sets, + subqueries_for_sets, + true, false, true, false); + ActionsVisitor(visitor_data, visit_log.stream()).visit(func_ast); + actions = visitor_data.getActions(); + return std::make_shared(actions); +} + +ConcurrentHashJoin::BlockDispatchControlData & ConcurrentHashJoin::getBlockDispatchControlData(const Block & block, TableIndex table_index) +{ + auto & data = *dispatch_datas[table_index]; + if (data.has_init)[[likely]] + return data; + std::lock_guard lock(data.mutex); + if (data.has_init) + return data; + + if (table_join->getClauses().empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "empty join clauses"); + const auto & onexpr = table_join->getClauses()[0]; + if (table_index == LEFT) + { + data.hash_expression_actions = buildHashExpressionAction(block, onexpr.key_names_left, data.hash_columns_names); + } + else + { + data.hash_expression_actions = buildHashExpressionAction(block, onexpr.key_names_right, data.hash_columns_names); + } + data.header = block.cloneEmpty(); + data.has_init = true; + return data; +} + +void ConcurrentHashJoin::dispatchBlock(BlockDispatchControlData & dispatch_data, Block & from_block, std::vector & dispatched_blocks) +{ + auto rows_before_filtration = from_block.rows(); + dispatch_data.hash_expression_actions->execute(from_block, rows_before_filtration); + for (const auto & filter_column_name : dispatch_data.hash_columns_names) + { + auto full_column = from_block.findByName(filter_column_name)->column->convertToFullColumnIfConst(); + auto filter_desc = std::make_unique(*full_column); + auto num_filtered_rows = filter_desc->countBytesInFilter(); + ColumnsWithTypeAndName filtered_block_columns; + for (size_t i = 0; i < dispatch_data.header.columns(); ++i) + { + auto & from_column = from_block.getByPosition(i); + auto filtered_column = filter_desc->filter(*from_column.column, num_filtered_rows); + filtered_block_columns.emplace_back(filtered_column, from_column.type, from_column.name); + } + dispatched_blocks.emplace_back(std::move(filtered_block_columns)); + } +} + +void ConcurrentHashJoin::waitAllAddJoinedBlocksFinished() +{ + while (finished_add_joined_blocks_tasks < hash_joins.size())[[unlikely]] + { + std::shared_ptr hash_join; + { + std::unique_lock lock(finished_add_joined_blocks_tasks_mutex); + hash_join = getUnfinishedAddJoinedBlockTaks(); + if (!hash_join) + { + while (finished_add_joined_blocks_tasks < hash_joins.size()) + { + finished_add_joined_blocks_tasks_cond.wait(lock); + } + return; + } + } + std::unique_lock lock(hash_join->mutex); + while (!hash_join->pending_right_blocks.empty()) + { + Block & block = hash_join->pending_right_blocks.front(); + hash_join->data->addJoinedBlock(block, true); + hash_join->pending_right_blocks.pop_front(); + } + finished_add_joined_blocks_tasks += 1; + finished_add_joined_blocks_tasks_cond.notify_all(); + } +} + +std::shared_ptr ConcurrentHashJoin::getUnfinishedAddJoinedBlockTaks() +{ + for (auto & hash_join : hash_joins) + { + if (!hash_join->in_inserting) + { + hash_join->in_inserting = true; + return hash_join; + } + } + return nullptr; +} + +} +} diff --git a/src/Interpreters/ConcurrentHashJoin.h b/src/Interpreters/ConcurrentHashJoin.h new file mode 100644 index 00000000000..a93a1ab0ab5 --- /dev/null +++ b/src/Interpreters/ConcurrentHashJoin.h @@ -0,0 +1,105 @@ +#pragma once + +#include +#include +#include +#include +#include "Core/BackgroundSchedulePool.h" +#include "Functions/FunctionsLogical.h" +#include "Interpreters/ExpressionActions.h" +#include "Interpreters/IJoin.h" +#include "base/defines.h" +#include "base/types.h" +#include +#include +namespace DB +{ +namespace JoinStuff +{ +/** + * Can run addJoinedBlock() parallelly to speedup the join process. On test, it almose linear speedup by + * the degree of parallelism. + * + * The default HashJoin is not thread safe for inserting right table's rows and run it in a single thread. When + * the right table is large, the join process is too slow. + * + * We create multiple HashJoin instances here. In addJoinedBlock(), one input block is split into multiple blocks + * corresponding to the HashJoin instances by hashing every row on the join keys. And make a guarantee that every HashJoin + * instance is written by only one thread. + * + * When come to the left table matching, the blocks from left table are alse split into different HashJoin instances. + * + */ +class ConcurrentHashJoin : public IJoin +{ +public: + explicit ConcurrentHashJoin(ContextPtr context_, std::shared_ptr table_join_, size_t slots_, const Block & right_sample_block, bool any_take_last_row_ = false); + ~ConcurrentHashJoin() override = default; + + const TableJoin & getTableJoin() const override { return *table_join; } + bool addJoinedBlock(const Block & block, bool check_limits) override; + void checkTypesOfKeys(const Block & block) const override; + void joinBlock(Block & block, std::shared_ptr & not_processed) override; + void setTotals(const Block & block) override; + const Block & getTotals() const override; + size_t getTotalRowCount() const override; + size_t getTotalByteCount() const override; + bool alwaysReturnsEmptySet() const override; + bool supportParallelJoin() const override { return true; } + std::shared_ptr + getNonJoinedBlocks(const Block & left_sample_block, const Block & result_sample_block, UInt64 max_block_size) const override; +private: + struct InnerHashJoin + { + std::mutex mutex; + std::unique_ptr data; + std::list pending_right_blocks; + std::atomic in_inserting = false; + std::atomic rows = 0; + + }; + ContextPtr context; + std::shared_ptr table_join; + size_t slots; + std::vector> hash_joins; + std::atomic check_total_rows; + std::atomic check_total_bytes; + + std::mutex finished_add_joined_blocks_tasks_mutex; + std::condition_variable finished_add_joined_blocks_tasks_cond; + std::atomic finished_add_joined_blocks_tasks = 0; + + mutable std::mutex totals_mutex; + Block totals; + + enum TableIndex + { + LEFT = 0, + RIGHT = 1 + }; + + struct BlockDispatchControlData + { + std::mutex mutex; + std::atomic has_init = false; + std::shared_ptr hash_expression_actions; + Strings hash_columns_names; + Block header; + BlockDispatchControlData() = default; + }; + + std::vector> dispatch_datas; + + Poco::Logger * logger = &Poco::Logger::get("ConcurrentHashJoin"); + + std::shared_ptr buildHashExpressionAction(const Block & block, const Strings & based_columns_names, Strings & hash_columns_names); + BlockDispatchControlData & getBlockDispatchControlData(const Block & block, TableIndex table_index); + + static void dispatchBlock(BlockDispatchControlData & dispatch_data, Block & from_block, std::vector & dispatched_blocks); + + void waitAllAddJoinedBlocksFinished(); + std::shared_ptr getUnfinishedAddJoinedBlockTaks(); + +}; +} +} diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 5e5931713e0..ce1376c41e4 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -16,6 +17,7 @@ #include #include +#include #include #include #include @@ -933,7 +935,13 @@ static std::shared_ptr chooseJoinAlgorithm(std::shared_ptr ana bool allow_merge_join = analyzed_join->allowMergeJoin(); if (analyzed_join->forceHashJoin() || (analyzed_join->preferMergeJoin() && !allow_merge_join)) + { + if (analyzed_join->allowConcurrentHashJoin()) + { + return std::make_shared(context, analyzed_join, context->getSettings().max_threads, sample_block); + } return std::make_shared(analyzed_join, sample_block); + } else if (analyzed_join->forceMergeJoin() || (analyzed_join->preferMergeJoin() && allow_merge_join)) return std::make_shared(analyzed_join, sample_block); return std::make_shared(analyzed_join, sample_block); diff --git a/src/Interpreters/IJoin.h b/src/Interpreters/IJoin.h index 2a3171adccd..64b576d3b96 100644 --- a/src/Interpreters/IJoin.h +++ b/src/Interpreters/IJoin.h @@ -45,6 +45,9 @@ public: /// Different query plan is used for such joins. virtual bool isFilled() const { return false; } + // That can run FillingRightJoinSideTransform parallelly + virtual bool supportParallelJoin() const { return false; } + virtual std::shared_ptr getNonJoinedBlocks(const Block & left_sample_block, const Block & result_sample_block, UInt64 max_block_size) const = 0; }; diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index e981e6f0648..3d6505a6ebf 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1269,7 +1269,8 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

getCurrentDataStream(), expressions.join, - settings.max_block_size); + settings.max_block_size, + max_streams); join_step->setStepDescription("JOIN"); std::vector plans; diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp index ec5358cf6bc..1e6e7f56eed 100644 --- a/src/Interpreters/TableJoin.cpp +++ b/src/Interpreters/TableJoin.cpp @@ -3,6 +3,7 @@ #include #include #include +#include "Parsers/ASTTablesInSelectQuery.h" #include #include @@ -106,6 +107,7 @@ TableJoin::TableJoin(const Settings & settings, VolumePtr tmp_volume_) , partial_merge_join_left_table_buffer_bytes(settings.partial_merge_join_left_table_buffer_bytes) , max_files_to_merge(settings.join_on_disk_max_files_to_merge) , temporary_files_codec(settings.temporary_files_codec) + , enable_parallel_join(settings.enable_parallel_join) , tmp_volume(tmp_volume_) { } @@ -748,4 +750,16 @@ void TableJoin::resetToCross() this->table_join.kind = ASTTableJoin::Kind::Cross; } +bool TableJoin::allowConcurrentHashJoin() const +{ + if (!enable_parallel_join) + return false; + if (dictionary_reader || join_algorithm != JoinAlgorithm::HASH) + return false; + if (table_join.kind != ASTTableJoin::Kind::Left && table_join.kind != ASTTableJoin::Kind::Inner) + return false; + + return true; +} + } diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h index f7c03ac6e1a..c7c59f69029 100644 --- a/src/Interpreters/TableJoin.h +++ b/src/Interpreters/TableJoin.h @@ -112,6 +112,7 @@ private: const size_t partial_merge_join_left_table_buffer_bytes = 0; const size_t max_files_to_merge = 0; const String temporary_files_codec = "LZ4"; + const bool enable_parallel_join = false; /// the limit has no technical reasons, it supposed to improve safety const size_t MAX_DISJUNCTS = 16; /// NOLINT @@ -191,6 +192,7 @@ public: bool allowMergeJoin() const; bool preferMergeJoin() const { return join_algorithm == JoinAlgorithm::PREFER_PARTIAL_MERGE; } bool forceMergeJoin() const { return join_algorithm == JoinAlgorithm::PARTIAL_MERGE; } + bool allowConcurrentHashJoin() const; bool forceHashJoin() const { /// HashJoin always used for DictJoin diff --git a/src/Processors/QueryPlan/JoinStep.cpp b/src/Processors/QueryPlan/JoinStep.cpp index 494a2a6aa0e..0170c356459 100644 --- a/src/Processors/QueryPlan/JoinStep.cpp +++ b/src/Processors/QueryPlan/JoinStep.cpp @@ -15,9 +15,11 @@ JoinStep::JoinStep( const DataStream & left_stream_, const DataStream & right_stream_, JoinPtr join_, - size_t max_block_size_) + size_t max_block_size_, + size_t max_streams_) : join(std::move(join_)) , max_block_size(max_block_size_) + , max_streams(max_streams_) { input_streams = {left_stream_, right_stream_}; output_stream = DataStream @@ -31,7 +33,7 @@ QueryPipelineBuilderPtr JoinStep::updatePipeline(QueryPipelineBuilders pipelines if (pipelines.size() != 2) throw Exception(ErrorCodes::LOGICAL_ERROR, "JoinStep expect two input steps"); - return QueryPipelineBuilder::joinPipelines(std::move(pipelines[0]), std::move(pipelines[1]), join, max_block_size, &processors); + return QueryPipelineBuilder::joinPipelines(std::move(pipelines[0]), std::move(pipelines[1]), join, max_block_size, &processors, max_streams); } void JoinStep::describePipeline(FormatSettings & settings) const diff --git a/src/Processors/QueryPlan/JoinStep.h b/src/Processors/QueryPlan/JoinStep.h index 71537f29a8e..0ae1f78594b 100644 --- a/src/Processors/QueryPlan/JoinStep.h +++ b/src/Processors/QueryPlan/JoinStep.h @@ -16,7 +16,8 @@ public: const DataStream & left_stream_, const DataStream & right_stream_, JoinPtr join_, - size_t max_block_size_); + size_t max_block_size_, + size_t max_streams_ = 0); String getName() const override { return "Join"; } @@ -29,6 +30,7 @@ public: private: JoinPtr join; size_t max_block_size; + size_t max_streams; Processors processors; }; diff --git a/src/QueryPipeline/QueryPipelineBuilder.cpp b/src/QueryPipeline/QueryPipelineBuilder.cpp index 9f392b51cf0..91a6346d90a 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.cpp +++ b/src/QueryPipeline/QueryPipelineBuilder.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -298,7 +299,8 @@ std::unique_ptr QueryPipelineBuilder::joinPipelines( std::unique_ptr right, JoinPtr join, size_t max_block_size, - Processors * collected_processors) + Processors * collected_processors, + size_t max_streams) { left->checkInitializedAndNotCompleted(); right->checkInitializedAndNotCompleted(); @@ -336,15 +338,39 @@ std::unique_ptr QueryPipelineBuilder::joinPipelines( /// ╞> FillingJoin ─> Resize ╣ ╞> Joining ─> (totals) /// (totals) ─────────┘ ╙─────┘ - size_t num_streams = left->getNumStreams(); - right->resize(1); + size_t num_streams = left->getNumStreams() < max_streams ? max_streams : left->getNumStreams(); + left->resize(num_streams); + num_streams = left->getNumStreams(); - auto adding_joined = std::make_shared(right->getHeader(), join); - InputPort * totals_port = nullptr; - if (right->hasTotals()) - totals_port = adding_joined->addTotalsPort(); + if (join->supportParallelJoin() && !right->hasTotals()) + { + right->resize(num_streams); + auto concurrent_right_filling_transform = [&](OutputPortRawPtrs outports) + { + Processors processors; + for (auto & outport : outports) + { + auto adding_joined = std::make_shared(right->getHeader(), join); + connect(*outport, adding_joined->getInputs().front()); + processors.emplace_back(adding_joined); + } + return processors; + }; + right->transform(concurrent_right_filling_transform); + right->resize(1); + } + else + { + LOG_TRACE(&Poco::Logger::get("QueryPipelineBuilder"), "run in single thread on right loading"); + right->resize(1); - right->addTransform(std::move(adding_joined), totals_port, nullptr); + auto adding_joined = std::make_shared(right->getHeader(), join); + InputPort * totals_port = nullptr; + if (right->hasTotals()) + totals_port = adding_joined->addTotalsPort(); + + right->addTransform(std::move(adding_joined), totals_port, nullptr); + } size_t num_streams_including_totals = num_streams + (left->hasTotals() ? 1 : 0); right->resize(num_streams_including_totals); diff --git a/src/QueryPipeline/QueryPipelineBuilder.h b/src/QueryPipeline/QueryPipelineBuilder.h index ac84191cf34..5f483b86c1c 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.h +++ b/src/QueryPipeline/QueryPipelineBuilder.h @@ -101,7 +101,8 @@ public: std::unique_ptr right, JoinPtr join, size_t max_block_size, - Processors * collected_processors = nullptr); + Processors * collected_processors = nullptr, + size_t max_streams = 0); /// Add other pipeline and execute it before current one. /// Pipeline must have empty header, it should not generate any chunk. diff --git a/tests/queries/1_stateful/00172_parallel_join.reference b/tests/queries/1_stateful/00172_parallel_join.reference new file mode 100644 index 00000000000..81e6cb3efd8 --- /dev/null +++ b/tests/queries/1_stateful/00172_parallel_join.reference @@ -0,0 +1,299 @@ +2014-03-17 1406958 265108 +2014-03-19 1405797 261624 +2014-03-18 1383658 258723 +2014-03-20 1353623 255328 +2014-03-21 1245779 236232 +2014-03-23 1046491 202212 +2014-03-22 1031592 197354 +2014-03-17 1406958 265108 +2014-03-19 1405797 261624 +2014-03-18 1383658 258723 +2014-03-20 1353623 255328 +2014-03-21 1245779 236232 +2014-03-23 1046491 202212 +2014-03-22 1031592 197354 + 4508153 712428 +auto.ru 576845 8935 +yandex.ru 410776 111278 +korer.ru 277987 0 +avito.ru 163820 15556 +mail.yandex.ru 152447 1046 +mail.ru 87949 22225 +best.ru 58537 55 +korablitz.ru 51844 0 +hurpass.com 49671 1251 +37292 0 35642 +92887 252214 0 +7842 196036 0 +42440 0 17837 +97811 0 13665 +99378 0 11401 +6764 0 11052 +43564 0 9522 +37645 0 9086 +76197 0 8379 +24271 0 7393 +39804 64180 0 +78332 0 6167 +82837 0 5928 +51651 0 5878 +63469 52152 0 +76593 0 4705 +54688 0 4421 +36747 44082 0 +51763 0 4309 +92887 252214 0 +7842 196036 0 +39804 64180 0 +63469 52152 0 +36747 44082 0 +10963 37562 0 +46685 28798 0 +24015 23368 0 +37615 20857 0 +62896 19719 0 +67664 19402 0 +96597 18557 0 +11045 17391 0 +25884 17302 0 +64910 17279 0 +26096 16994 0 +96134 16849 0 +5788 13974 0 +71509 13792 0 +67326 13181 0 +37292 0 35642 +42440 0 17837 +97811 0 13665 +99378 0 11401 +6764 0 11052 +43564 0 9522 +37645 0 9086 +76197 0 8379 +24271 0 7393 +78332 0 6167 +82837 0 5928 +51651 0 5878 +76593 0 4705 +54688 0 4421 +51763 0 4309 +37882 0 4094 +6236 0 3959 +28456 0 3746 +19008 0 3426 +81009 0 3158 +2961521519262 2014-03-19 00:12:45 gen_time ['gen_time'] +2961521519262 2014-03-19 00:12:45 gen_timestamp Счетчика нет (10 ['gen_timestamp'] +2961521519262 2014-03-19 00:12:45 hasTemplates Счетчик есть null ['hasTemplates'] +2961521519262 2014-03-19 00:12:45 hasTemplates Счетчик есть null ['hasTemplates'] +2961521519262 2014-03-19 00:12:45 hasTemplates Счетчик есть null ['hasTemplates'] +2961521519262 2014-03-19 00:12:45 Роль пользователя ['Роль пользователя'] +2961521519262 2014-03-19 00:12:46 videoid ['videoid'] +2961521519262 2014-03-19 00:12:46 videoid Done ['videoid'] +2961521519262 2014-03-19 00:12:46 Поиск ['Поиск'] +2961521519262 2014-03-19 00:12:47 gen_timestamp Счетчика нет (10 ['gen_timestamp'] +2961521519262 2014-03-19 00:12:47 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] +2961521519262 2014-03-19 00:12:51 videoid 8950vvvv ['videoid'] +2961521519262 2014-03-19 00:14:11 errors SMS ['errors'] +2961521519262 2014-03-19 00:14:12 gen_timestamp Счетчика нет (10 ['gen_timestamp'] +2961521519262 2014-03-19 00:15:11 videoid Done ['videoid'] +2961521519262 2014-03-19 00:15:12 videoid ['videoid'] +2961521519262 2014-03-19 00:15:12 Поиск ['Поиск'] +2961521519262 2014-03-19 00:15:12 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] +2961521519262 2014-03-19 00:15:17 videoid 8950vvvv ['videoid'] +2961521519262 2014-03-19 00:19:21 gen_timestamp 564.815 ['gen_timestamp'] +2961521519262 2014-03-19 00:19:22 gen_timestamp 564.815 ['gen_timestamp'] +164599821266083 2014-03-22 10:30:50 gen_time views ['gen_time'] +164599821266083 2014-03-22 10:30:50 gen_timestamp vkontakte,face element ['gen_timestamp'] +164599821266083 2014-03-22 10:30:50 Платность Превьюшки ['Платность'] +164599821266083 2014-03-22 10:30:53 Аттачи в списке /pages/biblio ['Аттачи в списке'] +164599821266083 2014-03-22 10:31:02 Платность Превьюшки ['Платность'] +164599821266083 2014-03-22 10:31:05 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] +164599821266083 2014-03-22 10:31:06 Поиск Счетчика ['Поиск'] +335386658089392 2014-03-18 04:28:17 gen_time ['gen_time'] +335386658089392 2014-03-18 04:28:18 Роль пользователя ['Роль пользователя'] +335386658089392 2014-03-18 04:28:25 Поиск ['Поиск'] +335386658089392 2014-03-18 04:28:26 videoid ['videoid'] +335386658089392 2014-03-18 04:28:26 videoid Done ['videoid'] +335386658089392 2014-03-18 04:28:26 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] +335386658089392 2014-03-18 05:48:43 gen_time ['gen_time'] +335386658089392 2014-03-18 05:48:43 Роль пользователя ['Роль пользователя'] +335386658089392 2014-03-18 05:48:44 Дра ['Дра'] +419794772979101 2014-03-17 22:08:42 gen_time ['gen_time'] +419794772979101 2014-03-17 22:08:43 Роль пользователя ['Роль пользователя'] +419794772979101 2014-03-17 22:08:50 Поиск ['Поиск'] +419794772979101 2014-03-17 22:08:50 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] +419794772979101 2014-03-19 14:26:49 gen_time ['gen_time'] +419794772979101 2014-03-19 14:26:49 Дра ['Дра'] +419794772979101 2014-03-19 14:26:49 Эксперимент про Счетчик есть null ['Эксперимент про'] +419794772979101 2014-03-19 14:26:52 Поиск ['Поиск'] +419794772979101 2014-03-19 14:26:52 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] +818136115259082 2014-03-18 11:33:51 gen_time ['gen_time'] +818136115259082 2014-03-18 11:33:51 Дра ['Дра'] +818136115259082 2014-03-18 11:33:52 Роль пользователя ['Роль пользователя'] +818136115259082 2014-03-18 11:33:55 Поиск ['Поиск'] +818136115259082 2014-03-18 11:33:55 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] +818136115259082 2014-03-18 11:34:42 gen_timestamp 564.815 ['gen_timestamp'] +818136115259082 2014-03-18 11:34:44 gen_timestamp 564.815 ['gen_timestamp'] +870023587671569 2014-03-16 22:17:44 gen_time ['gen_time'] +870023587671569 2014-03-16 22:17:44 gen_timestamp vkontakte,face element ['gen_timestamp'] +1391426411567902 2014-03-18 10:39:27 gen_time ['gen_time'] +1391426411567902 2014-03-18 10:39:27 gen_timestamp Счетчика нет (10 ['gen_timestamp'] +1391426411567902 2014-03-18 10:39:27 Платность model ['Платность'] +1391426411567902 2014-03-18 10:39:27 Эксперимент про Счетчик есть null ['Эксперимент про'] +1391426411567902 2014-03-18 10:39:37 videoid ['videoid'] +1391426411567902 2014-03-18 10:39:37 videoid Done ['videoid'] +1391426411567902 2014-03-18 10:39:37 Поиск ['Поиск'] +1391426411567902 2014-03-18 10:39:37 Поиск ['Поиск'] +1391426411567902 2014-03-18 10:39:37 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] +1391426411567902 2014-03-18 10:39:37 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] +1391426411567902 2014-03-18 10:40:07 gen_timestamp Счетчика нет (10 ['gen_timestamp'] +1391426411567902 2014-03-18 10:43:42 Toolbar auto 25%;\n\t\t\t\t \n \n return ['Toolbar'] +1391426411567902 2014-03-18 10:43:42 gen_timestamp Счетчика нет (10 ['gen_timestamp'] +1391426411567902 2014-03-18 10:43:42 gen_timestamp Счетчика нет (10 ['gen_timestamp'] +1391426411567902 2014-03-18 10:43:42 Права ['Права'] +1391426411567902 2014-03-18 10:43:44 gen_timestamp Счетчика нет (10 ['gen_timestamp'] +1391426411567902 2014-03-18 10:43:48 Toolbar Счетчик ['Toolbar'] +1391426411567902 2014-03-18 10:43:48 gen_timestamp Счетчика нет (10 ['gen_timestamp'] +1391426411567902 2014-03-18 10:43:48 Поиск ['Поиск'] +1391426411567902 2014-03-18 10:43:48 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] +1560113638322066 2014-03-17 13:21:19 hasTemplates Счетчик есть null ['hasTemplates'] +1560113638322066 2014-03-17 13:21:19 hasTemplates Счетчик есть null ['hasTemplates'] +1560113638322066 2014-03-17 13:28:21 hasTemplates Счетчик есть null ['hasTemplates'] +1560113638322066 2014-03-17 13:28:22 hasTemplates Счетчик есть null ['hasTemplates'] +1560113638322066 2014-03-17 13:28:22 hasTemplates Счетчик есть null ['hasTemplates'] +1560113638322066 2014-03-17 17:01:54 hasTemplates Счетчик есть null ['hasTemplates'] +1560113638322066 2014-03-17 17:15:06 hasTemplates Счетчик есть null ['hasTemplates'] +1560113638322066 2014-03-17 17:15:09 hasTemplates Счетчик есть null ['hasTemplates'] +1560113638322066 2014-03-17 17:15:09 hasTemplates Счетчик есть null ['hasTemplates'] +1560113638322066 2014-03-17 17:22:15 errors 1e521117369b39edaa078c8021df25 ['errors'] +1560113638322066 2014-03-17 17:22:15 gen_time ['gen_time'] +1560113638322066 2014-03-17 17:22:15 hasTemplates Счетчик есть null ['hasTemplates'] +1560113638322066 2014-03-17 17:22:15 Дра ['Дра'] +1560113638322066 2014-03-17 17:22:15 Поиск ['Поиск'] +1560113638322066 2014-03-17 17:22:16 Toolbar Счетчик ['Toolbar'] +1560113638322066 2014-03-17 17:22:16 errors 1e521117369b39edaa078c8021df25 ['errors'] +1560113638322066 2014-03-17 17:22:16 errors 1e521117369b39edaa078c8021df25 ['errors'] +1560113638322066 2014-03-17 17:22:16 errors 1e521117369b39edaa078c8021df25 ['errors'] +1560113638322066 2014-03-17 17:22:16 errors 1e521117369b39edaa078c8021df25 ['errors'] +1560113638322066 2014-03-17 17:22:16 errors 1e521117369b39edaa078c8021df25 ['errors'] +1560113638322066 2014-03-17 17:22:16 errors 1e521117369b39edaa078c8021df25 ['errors'] +1560113638322066 2014-03-17 17:22:16 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] +1560113638322066 2014-03-17 17:22:16 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] +1560113638322066 2014-03-17 17:22:42 hasTemplates Счетчик есть null ['hasTemplates'] +1560113638322066 2014-03-17 17:22:46 errors SMS ['errors'] +2961521519262 2014-03-19 00:12:45 [] +2961521519262 2014-03-19 00:12:45 gen_time ['gen_time'] +2961521519262 2014-03-19 00:12:45 gen_timestamp Счетчика нет (10 ['gen_timestamp'] +2961521519262 2014-03-19 00:12:45 hasTemplates Счетчик есть null ['hasTemplates'] +2961521519262 2014-03-19 00:12:45 hasTemplates Счетчик есть null ['hasTemplates'] +2961521519262 2014-03-19 00:12:45 hasTemplates Счетчик есть null ['hasTemplates'] +2961521519262 2014-03-19 00:12:45 Роль пользователя ['Роль пользователя'] +2961521519262 2014-03-19 00:12:46 videoid ['videoid'] +2961521519262 2014-03-19 00:12:46 videoid Done ['videoid'] +2961521519262 2014-03-19 00:12:46 Поиск ['Поиск'] +2961521519262 2014-03-19 00:12:47 [] +2961521519262 2014-03-19 00:12:47 gen_timestamp Счетчика нет (10 ['gen_timestamp'] +2961521519262 2014-03-19 00:12:47 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] +2961521519262 2014-03-19 00:12:51 [] +2961521519262 2014-03-19 00:12:51 videoid 8950vvvv ['videoid'] +2961521519262 2014-03-19 00:13:28 [] +2961521519262 2014-03-19 00:14:11 errors SMS ['errors'] +2961521519262 2014-03-19 00:14:12 gen_timestamp Счетчика нет (10 ['gen_timestamp'] +2961521519262 2014-03-19 00:15:11 videoid Done ['videoid'] +2961521519262 2014-03-19 00:15:12 [] +2961521519262 2014-03-19 00:15:12 videoid ['videoid'] +2961521519262 2014-03-19 00:15:12 Поиск ['Поиск'] +2961521519262 2014-03-19 00:15:12 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] +2961521519262 2014-03-19 00:15:17 [] +2961521519262 2014-03-19 00:15:17 videoid 8950vvvv ['videoid'] +2961521519262 2014-03-19 00:19:21 gen_timestamp 564.815 ['gen_timestamp'] +2961521519262 2014-03-19 00:19:22 gen_timestamp 564.815 ['gen_timestamp'] +164599821266083 2014-03-22 10:30:50 [] +164599821266083 2014-03-22 10:30:50 [] +164599821266083 2014-03-22 10:30:50 gen_time views ['gen_time'] +164599821266083 2014-03-22 10:30:50 gen_timestamp vkontakte,face element ['gen_timestamp'] +164599821266083 2014-03-22 10:30:50 Платность Превьюшки ['Платность'] +164599821266083 2014-03-22 10:30:53 Аттачи в списке /pages/biblio ['Аттачи в списке'] +164599821266083 2014-03-22 10:31:02 Платность Превьюшки ['Платность'] +164599821266083 2014-03-22 10:31:05 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] +164599821266083 2014-03-22 10:31:06 [] +164599821266083 2014-03-22 10:31:06 Поиск Счетчика ['Поиск'] +335386658089392 2014-03-18 04:28:17 [] +335386658089392 2014-03-18 04:28:17 gen_time ['gen_time'] +335386658089392 2014-03-18 04:28:18 Роль пользователя ['Роль пользователя'] +335386658089392 2014-03-18 04:28:25 Поиск ['Поиск'] +335386658089392 2014-03-18 04:28:26 [] +335386658089392 2014-03-18 04:28:26 videoid ['videoid'] +335386658089392 2014-03-18 04:28:26 videoid Done ['videoid'] +335386658089392 2014-03-18 04:28:26 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] +335386658089392 2014-03-18 04:28:31 [] +335386658089392 2014-03-18 04:28:31 [] +335386658089392 2014-03-18 05:48:43 [] +335386658089392 2014-03-18 05:48:43 gen_time ['gen_time'] +335386658089392 2014-03-18 05:48:43 Роль пользователя ['Роль пользователя'] +335386658089392 2014-03-18 05:48:44 Дра ['Дра'] +419794772979101 2014-03-17 22:08:42 [] +419794772979101 2014-03-17 22:08:42 gen_time ['gen_time'] +419794772979101 2014-03-17 22:08:43 Роль пользователя ['Роль пользователя'] +419794772979101 2014-03-17 22:08:50 Поиск ['Поиск'] +419794772979101 2014-03-17 22:08:50 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] +419794772979101 2014-03-17 22:08:51 [] +419794772979101 2014-03-19 14:26:49 [] +419794772979101 2014-03-19 14:26:49 gen_time ['gen_time'] +419794772979101 2014-03-19 14:26:49 Дра ['Дра'] +419794772979101 2014-03-19 14:26:49 Эксперимент про Счетчик есть null ['Эксперимент про'] +419794772979101 2014-03-19 14:26:52 [] +419794772979101 2014-03-19 14:26:52 Поиск ['Поиск'] +419794772979101 2014-03-19 14:26:52 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] +818136115259082 2014-03-18 11:33:51 [] +818136115259082 2014-03-18 11:33:51 gen_time ['gen_time'] +818136115259082 2014-03-18 11:33:51 Дра ['Дра'] +818136115259082 2014-03-18 11:33:52 Роль пользователя ['Роль пользователя'] +818136115259082 2014-03-18 11:33:55 Поиск ['Поиск'] +818136115259082 2014-03-18 11:33:55 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] +818136115259082 2014-03-18 11:33:56 [] +818136115259082 2014-03-18 11:34:42 gen_timestamp 564.815 ['gen_timestamp'] +818136115259082 2014-03-18 11:34:44 [] +818136115259082 2014-03-18 11:34:44 gen_timestamp 564.815 ['gen_timestamp'] +870023587671569 2014-03-16 22:17:44 [] +870023587671569 2014-03-16 22:17:44 [] +870023587671569 2014-03-16 22:17:44 gen_time ['gen_time'] +870023587671569 2014-03-16 22:17:44 gen_timestamp vkontakte,face element ['gen_timestamp'] +870023587671569 2014-03-16 22:17:45 [] +870023587671569 2014-03-16 22:17:46 [] +1026297835113727 2014-03-17 18:41:53 [] +1026297835113727 2014-03-18 02:37:28 [] +1026297835113727 2014-03-19 18:26:53 [] +1026297835113727 2014-03-20 05:37:49 [] +1026297835113727 2014-03-20 05:49:38 [] +1026297835113727 2014-03-20 13:39:34 [] +1026297835113727 2014-03-21 07:38:39 [] +1026297835113727 2014-03-22 07:13:26 [] +1026297835113727 2014-03-22 10:50:25 [] +1391426411567902 2014-03-18 10:39:27 [] +1391426411567902 2014-03-18 10:39:27 gen_time ['gen_time'] +1391426411567902 2014-03-18 10:39:27 gen_timestamp Счетчика нет (10 ['gen_timestamp'] +1391426411567902 2014-03-18 10:39:27 Платность model ['Платность'] +1391426411567902 2014-03-18 10:39:27 Эксперимент про Счетчик есть null ['Эксперимент про'] +1391426411567902 2014-03-18 10:39:37 [] +1391426411567902 2014-03-18 10:39:37 videoid ['videoid'] +1391426411567902 2014-03-18 10:39:37 videoid Done ['videoid'] +1391426411567902 2014-03-18 10:39:37 Поиск ['Поиск'] +1391426411567902 2014-03-18 10:39:37 Поиск ['Поиск'] +1391426411567902 2014-03-18 10:39:37 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] +4 4 2 +0 0 0 +8 8 4 +2 2 1 +6 6 3 +1 1 2 +1 1 3 +4 4 8 +4 4 9 +0 0 0 +0 0 1 +2 2 4 +2 2 5 +3 3 6 +3 3 7 diff --git a/tests/queries/1_stateful/00172_parallel_join.sql b/tests/queries/1_stateful/00172_parallel_join.sql new file mode 100644 index 00000000000..809c5d5e065 --- /dev/null +++ b/tests/queries/1_stateful/00172_parallel_join.sql @@ -0,0 +1,204 @@ +set enable_parallel_join=true; +SELECT + EventDate, + hits, + visits +FROM +( + SELECT + EventDate, + count() AS hits + FROM test.hits + GROUP BY EventDate +) ANY LEFT JOIN +( + SELECT + StartDate AS EventDate, + sum(Sign) AS visits + FROM test.visits + GROUP BY EventDate +) USING EventDate +ORDER BY hits DESC +LIMIT 10 +SETTINGS joined_subquery_requires_alias = 0; + + +SELECT + EventDate, + count() AS hits, + any(visits) +FROM test.hits ANY LEFT JOIN +( + SELECT + StartDate AS EventDate, + sum(Sign) AS visits + FROM test.visits + GROUP BY EventDate +) USING EventDate +GROUP BY EventDate +ORDER BY hits DESC +LIMIT 10 +SETTINGS joined_subquery_requires_alias = 0, enable_parallel_join=true; + + +SELECT + domain, + hits, + visits +FROM +( + SELECT + domain(URL) AS domain, + count() AS hits + FROM test.hits + GROUP BY domain +) ANY LEFT JOIN +( + SELECT + domain(StartURL) AS domain, + sum(Sign) AS visits + FROM test.visits + GROUP BY domain +) USING domain +ORDER BY hits DESC +LIMIT 10 +SETTINGS joined_subquery_requires_alias = 0; + +SELECT CounterID FROM test.visits ARRAY JOIN Goals.ID WHERE CounterID = 942285 ORDER BY CounterID; + + +SELECT + CounterID, + hits, + visits +FROM +( + SELECT + (CounterID % 100000) AS CounterID, + count() AS hits + FROM test.hits + GROUP BY CounterID +) ANY FULL OUTER JOIN +( + SELECT + (CounterID % 100000) AS CounterID, + sum(Sign) AS visits + FROM test.visits + GROUP BY CounterID + HAVING visits > 0 +) USING CounterID +WHERE hits = 0 OR visits = 0 +ORDER BY + hits + visits * 10 DESC, + CounterID ASC +LIMIT 20 +SETTINGS any_join_distinct_right_table_keys = 1, joined_subquery_requires_alias = 0; + + +SELECT + CounterID, + hits, + visits +FROM +( + SELECT + (CounterID % 100000) AS CounterID, + count() AS hits + FROM test.hits + GROUP BY CounterID +) ANY LEFT JOIN +( + SELECT + (CounterID % 100000) AS CounterID, + sum(Sign) AS visits + FROM test.visits + GROUP BY CounterID + HAVING visits > 0 +) USING CounterID +WHERE hits = 0 OR visits = 0 +ORDER BY + hits + visits * 10 DESC, + CounterID ASC +LIMIT 20 +SETTINGS any_join_distinct_right_table_keys = 1, joined_subquery_requires_alias = 0; + + +SELECT + CounterID, + hits, + visits +FROM +( + SELECT + (CounterID % 100000) AS CounterID, + count() AS hits + FROM test.hits + GROUP BY CounterID +) ANY RIGHT JOIN +( + SELECT + (CounterID % 100000) AS CounterID, + sum(Sign) AS visits + FROM test.visits + GROUP BY CounterID + HAVING visits > 0 +) USING CounterID +WHERE hits = 0 OR visits = 0 +ORDER BY + hits + visits * 10 DESC, + CounterID ASC +LIMIT 20 +SETTINGS any_join_distinct_right_table_keys = 1, joined_subquery_requires_alias = 0; + + +SELECT + CounterID, + hits, + visits +FROM +( + SELECT + (CounterID % 100000) AS CounterID, + count() AS hits + FROM test.hits + GROUP BY CounterID +) ANY INNER JOIN +( + SELECT + (CounterID % 100000) AS CounterID, + sum(Sign) AS visits + FROM test.visits + GROUP BY CounterID + HAVING visits > 0 +) USING CounterID +WHERE hits = 0 OR visits = 0 +ORDER BY + hits + visits * 10 DESC, + CounterID ASC +LIMIT 20 +SETTINGS any_join_distinct_right_table_keys = 1, joined_subquery_requires_alias = 0; + +SELECT UserID, EventTime::DateTime('Asia/Dubai'), pp.Key1, pp.Key2, ParsedParams.Key1 FROM test.hits ARRAY JOIN ParsedParams AS pp WHERE CounterID = 1704509 ORDER BY UserID, EventTime, pp.Key1, pp.Key2 LIMIT 100; + +SELECT UserID, EventTime::DateTime('Asia/Dubai'), pp.Key1, pp.Key2, ParsedParams.Key1 FROM test.hits LEFT ARRAY JOIN ParsedParams AS pp WHERE CounterID = 1704509 ORDER BY UserID, EventTime, pp.Key1, pp.Key2 LIMIT 100; + +SELECT a.*, b.* FROM +( + SELECT number AS k FROM system.numbers LIMIT 10 +) AS a +ANY INNER JOIN +( + SELECT number * 2 AS k, number AS joined FROM system.numbers LIMIT 10 +) AS b +USING k +SETTINGS any_join_distinct_right_table_keys = 1; + +SELECT a.*, b.* FROM +( + SELECT number AS k FROM system.numbers LIMIT 10 +) AS a +ALL INNER JOIN +( + SELECT intDiv(number, 2) AS k, number AS joined FROM system.numbers LIMIT 10 +) AS b +USING k; From 9dfe2a6682c15383719c86f5e364d50888ab8615 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Tue, 19 Apr 2022 16:53:24 +0800 Subject: [PATCH 016/106] fixed header style --- src/Interpreters/ConcurrentHashJoin.cpp | 34 ++++++++++++------------- src/Interpreters/ConcurrentHashJoin.h | 16 ++++++------ 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/src/Interpreters/ConcurrentHashJoin.cpp b/src/Interpreters/ConcurrentHashJoin.cpp index b397aec41e5..78a41427c45 100644 --- a/src/Interpreters/ConcurrentHashJoin.cpp +++ b/src/Interpreters/ConcurrentHashJoin.cpp @@ -1,24 +1,24 @@ #include #include -#include -#include -#include "Columns/FilterDescription.h" -#include "Columns/IColumn.h" -#include "Core/ColumnsWithTypeAndName.h" -#include "Core/NamesAndTypes.h" -#include "IO/WriteBufferFromString.h" -#include "Interpreters/ActionsDAG.h" -#include "Interpreters/ActionsVisitor.h" -#include "Interpreters/ExpressionActions.h" -#include "Interpreters/PreparedSets.h" -#include "Interpreters/SubqueryForSet.h" -#include "Parsers/DumpASTNode.h" -#include "Parsers/ExpressionListParsers.h" -#include "Parsers/IAST_fwd.h" -#include "Parsers/parseQuery.h" +#include +#include +#include +#include #include +#include +#include +#include +#include #include -#include "Common/Exception.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include namespace DB { namespace ErrorCodes diff --git a/src/Interpreters/ConcurrentHashJoin.h b/src/Interpreters/ConcurrentHashJoin.h index a93a1ab0ab5..bfac9b61eb2 100644 --- a/src/Interpreters/ConcurrentHashJoin.h +++ b/src/Interpreters/ConcurrentHashJoin.h @@ -3,15 +3,15 @@ #include #include #include -#include -#include "Core/BackgroundSchedulePool.h" -#include "Functions/FunctionsLogical.h" -#include "Interpreters/ExpressionActions.h" -#include "Interpreters/IJoin.h" -#include "base/defines.h" -#include "base/types.h" -#include +#include +#include #include +#include +#include +#include +#include +#include +#include namespace DB { namespace JoinStuff From 3f2fe3ff6631eb00bd75fe2b1e3563f4132ce7a0 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Tue, 19 Apr 2022 18:47:29 +0800 Subject: [PATCH 017/106] fixed code style --- src/Core/Settings.h | 2 +- src/Interpreters/ConcurrentHashJoin.cpp | 5 +---- src/Interpreters/TableJoin.cpp | 4 ++-- src/Interpreters/TableJoin.h | 2 +- src/QueryPipeline/QueryPipelineBuilder.cpp | 2 -- 5 files changed, 5 insertions(+), 10 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 2093333375e..2e6b9aeffff 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -351,7 +351,7 @@ class IColumn; M(UInt64, partial_merge_join_left_table_buffer_bytes, 0, "If not 0 group left table blocks in bigger ones for left-side table in partial merge join. It uses up to 2x of specified memory per joining thread.", 0) \ M(UInt64, partial_merge_join_rows_in_right_blocks, 65536, "Split right-hand joining data in blocks of specified size. It's a portion of data indexed by min-max values and possibly unloaded on disk.", 0) \ M(UInt64, join_on_disk_max_files_to_merge, 64, "For MergeJoin on disk set how much files it's allowed to sort simultaneously. Then this value bigger then more memory used and then less disk I/O needed. Minimum is 2.", 0) \ - M(Bool, enable_parallel_join, false, "Enable paralle join algorithm.", 0)\ + M(Bool, enable_parallel_hash_join, false, "Enable paralle join algorithm.", 0)\ M(String, temporary_files_codec, "LZ4", "Set compression codec for temporary files (sort and join on disk). I.e. LZ4, NONE.", 0) \ \ M(UInt64, max_rows_to_transfer, 0, "Maximum size (in rows) of the transmitted external table obtained when the GLOBAL IN/JOIN section is executed.", 0) \ diff --git a/src/Interpreters/ConcurrentHashJoin.cpp b/src/Interpreters/ConcurrentHashJoin.cpp index 78a41427c45..4b551bc7de6 100644 --- a/src/Interpreters/ConcurrentHashJoin.cpp +++ b/src/Interpreters/ConcurrentHashJoin.cpp @@ -123,10 +123,7 @@ void ConcurrentHashJoin::joinBlock(Block & block, std::shared_ptr & size_t i = 0; for (auto & name_and_type : names_and_types) { - ColumnPtr col_ptr = std::move(mutable_final_columns[i]); - mutable_final_columns[i] = nullptr; - ColumnWithTypeAndName col(col_ptr, name_and_type.type, name_and_type.name); - final_columns.emplace_back(col); + final_columns.emplace_back(ColumnWithTypeAndName(std::move(mutable_final_columns[i]), name_and_type.type, name_and_type.name)); i += 1; } block = Block(final_columns); diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp index 1e6e7f56eed..8af21551054 100644 --- a/src/Interpreters/TableJoin.cpp +++ b/src/Interpreters/TableJoin.cpp @@ -107,7 +107,7 @@ TableJoin::TableJoin(const Settings & settings, VolumePtr tmp_volume_) , partial_merge_join_left_table_buffer_bytes(settings.partial_merge_join_left_table_buffer_bytes) , max_files_to_merge(settings.join_on_disk_max_files_to_merge) , temporary_files_codec(settings.temporary_files_codec) - , enable_parallel_join(settings.enable_parallel_join) + , enable_parallel_hash_join(settings.enable_parallel_hash_join) , tmp_volume(tmp_volume_) { } @@ -752,7 +752,7 @@ void TableJoin::resetToCross() bool TableJoin::allowConcurrentHashJoin() const { - if (!enable_parallel_join) + if (!enable_parallel_hash_join) return false; if (dictionary_reader || join_algorithm != JoinAlgorithm::HASH) return false; diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h index c7c59f69029..1d9ddc69a31 100644 --- a/src/Interpreters/TableJoin.h +++ b/src/Interpreters/TableJoin.h @@ -112,7 +112,7 @@ private: const size_t partial_merge_join_left_table_buffer_bytes = 0; const size_t max_files_to_merge = 0; const String temporary_files_codec = "LZ4"; - const bool enable_parallel_join = false; + const bool enable_parallel_hash_join = false; /// the limit has no technical reasons, it supposed to improve safety const size_t MAX_DISJUNCTS = 16; /// NOLINT diff --git a/src/QueryPipeline/QueryPipelineBuilder.cpp b/src/QueryPipeline/QueryPipelineBuilder.cpp index 91a6346d90a..f9d0e276e27 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.cpp +++ b/src/QueryPipeline/QueryPipelineBuilder.cpp @@ -340,7 +340,6 @@ std::unique_ptr QueryPipelineBuilder::joinPipelines( size_t num_streams = left->getNumStreams() < max_streams ? max_streams : left->getNumStreams(); left->resize(num_streams); - num_streams = left->getNumStreams(); if (join->supportParallelJoin() && !right->hasTotals()) { @@ -361,7 +360,6 @@ std::unique_ptr QueryPipelineBuilder::joinPipelines( } else { - LOG_TRACE(&Poco::Logger::get("QueryPipelineBuilder"), "run in single thread on right loading"); right->resize(1); auto adding_joined = std::make_shared(right->getHeader(), join); From 26ff749a32a6d85db104ce03b5b6fab4ee1e6cbd Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Tue, 19 Apr 2022 19:12:08 +0800 Subject: [PATCH 018/106] remove unused header --- src/Interpreters/TableJoin.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp index 8af21551054..5c034498cda 100644 --- a/src/Interpreters/TableJoin.cpp +++ b/src/Interpreters/TableJoin.cpp @@ -3,7 +3,6 @@ #include #include #include -#include "Parsers/ASTTablesInSelectQuery.h" #include #include From a5dd01edf34957705de5adbc6966441f59826e84 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Wed, 20 Apr 2022 09:53:09 +0800 Subject: [PATCH 019/106] update fast test scripts --- .../02236_explain_pipeline_join.reference | 24 ++++++++++--------- .../02236_explain_pipeline_join.sql | 3 ++- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/tests/queries/0_stateless/02236_explain_pipeline_join.reference b/tests/queries/0_stateless/02236_explain_pipeline_join.reference index ed993e2a1e7..1c1a7594a05 100644 --- a/tests/queries/0_stateless/02236_explain_pipeline_join.reference +++ b/tests/queries/0_stateless/02236_explain_pipeline_join.reference @@ -1,19 +1,21 @@ (Expression) -ExpressionTransform +ExpressionTransform × 16 (Join) - JoiningTransform 2 → 1 - (Expression) - ExpressionTransform - (SettingQuotaAndLimits) - (Limit) - Limit - (ReadFromStorage) - Numbers 0 → 1 - (Expression) - FillingRightJoinSide + JoiningTransform × 16 2 → 1 + Resize 1 → 16 + (Expression) ExpressionTransform (SettingQuotaAndLimits) (Limit) Limit (ReadFromStorage) Numbers 0 → 1 + (Expression) + Resize 1 → 16 + FillingRightJoinSide + ExpressionTransform + (SettingQuotaAndLimits) + (Limit) + Limit + (ReadFromStorage) + Numbers 0 → 1 \ No newline at end of file diff --git a/tests/queries/0_stateless/02236_explain_pipeline_join.sql b/tests/queries/0_stateless/02236_explain_pipeline_join.sql index de885ed74ee..7a92d6bfde0 100644 --- a/tests/queries/0_stateless/02236_explain_pipeline_join.sql +++ b/tests/queries/0_stateless/02236_explain_pipeline_join.sql @@ -7,4 +7,5 @@ ALL LEFT JOIN ( SELECT * FROM system.numbers LIMIT 10 ) t2 -USING number; +USING number +SETTINGS max_threads=16; From e76d3efb55a9a78441a9286ace8b0eea1962015f Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Wed, 20 Apr 2022 10:17:17 +0800 Subject: [PATCH 020/106] update fast test scripts --- tests/queries/0_stateless/01911_logical_error_minus.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01911_logical_error_minus.sql b/tests/queries/0_stateless/01911_logical_error_minus.sql index ac69f553e89..9813c1a8a5d 100644 --- a/tests/queries/0_stateless/01911_logical_error_minus.sql +++ b/tests/queries/0_stateless/01911_logical_error_minus.sql @@ -26,7 +26,7 @@ INSERT INTO codecTest (key, name, ref_valueF64, valueF64, ref_valueF32, valueF32 SELECT IF(-2, NULL, 0.00009999999747378752), IF(1048577, 1048576, NULL), c1.key, IF(1, NULL, NULL), c2.key FROM codecTest AS c1 , codecTest AS c2 WHERE ignore(IF(257, -2, NULL), arrayJoin([65537]), IF(3, 1024, 9223372036854775807)) AND IF(NULL, 256, NULL) AND (IF(NULL, '1048576', NULL) = (c1.key - NULL)) LIMIT 65535; -SELECT c1.key, c1.name, c1.ref_valueF64, c1.valueF64, c1.ref_valueF64 - c1.valueF64 AS dF64, '', c2.key, c2.ref_valueF64 FROM codecTest AS c1 , codecTest AS c2 WHERE (dF64 != 3) AND c1.valueF64 AND (c2.key = (c1.key - 1048576)) LIMIT 0; +SELECT c1.key, c1.name, c1.ref_valueF64, c1.valueF64, c1.ref_valueF64 - c1.valueF64 AS dF64, '', c2.key, c2.ref_valueF64 FROM codecTest AS c1 , codecTest AS c2 WHERE (dF64 != 3) AND c1.valueF64 != 0 AND (c2.key = (c1.key - 1048576)) LIMIT 0; DROP TABLE codecTest; From 1291098fb637ee86f6e88e548fa22d8beb96ff0f Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Wed, 20 Apr 2022 11:40:02 +0800 Subject: [PATCH 021/106] update fast test scripts --- tests/queries/0_stateless/02236_explain_pipeline_join.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02236_explain_pipeline_join.reference b/tests/queries/0_stateless/02236_explain_pipeline_join.reference index 1c1a7594a05..ad54708c9be 100644 --- a/tests/queries/0_stateless/02236_explain_pipeline_join.reference +++ b/tests/queries/0_stateless/02236_explain_pipeline_join.reference @@ -18,4 +18,4 @@ ExpressionTransform × 16 (Limit) Limit (ReadFromStorage) - Numbers 0 → 1 \ No newline at end of file + Numbers 0 → 1 From 95dd07f0b3d9fc39b6099758aed120361212a531 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Wed, 20 Apr 2022 14:03:35 +0800 Subject: [PATCH 022/106] update fast test scripts --- tests/queries/1_stateful/00172_parallel_join.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/1_stateful/00172_parallel_join.sql b/tests/queries/1_stateful/00172_parallel_join.sql index 809c5d5e065..dca546a65a6 100644 --- a/tests/queries/1_stateful/00172_parallel_join.sql +++ b/tests/queries/1_stateful/00172_parallel_join.sql @@ -1,4 +1,4 @@ -set enable_parallel_join=true; +set enable_parallel_hash_join=true; SELECT EventDate, hits, @@ -38,7 +38,7 @@ FROM test.hits ANY LEFT JOIN GROUP BY EventDate ORDER BY hits DESC LIMIT 10 -SETTINGS joined_subquery_requires_alias = 0, enable_parallel_join=true; +SETTINGS joined_subquery_requires_alias = 0, enable_parallel_hash_join=true; SELECT From 09bfcb0dedc320b4759140cd64a480abe37ef63d Mon Sep 17 00:00:00 2001 From: lgbo-usstc Date: Wed, 20 Apr 2022 16:39:31 +0800 Subject: [PATCH 023/106] update test scripts --- .../1_stateful/00172_parallel_join.reference | 400 +++++++++--------- .../1_stateful/00172_parallel_join.sql | 4 +- 2 files changed, 202 insertions(+), 202 deletions(-) diff --git a/tests/queries/1_stateful/00172_parallel_join.reference b/tests/queries/1_stateful/00172_parallel_join.reference index 81e6cb3efd8..9ba73024fa2 100644 --- a/tests/queries/1_stateful/00172_parallel_join.reference +++ b/tests/queries/1_stateful/00172_parallel_join.reference @@ -82,206 +82,206 @@ hurpass.com 49671 1251 28456 0 3746 19008 0 3426 81009 0 3158 -2961521519262 2014-03-19 00:12:45 gen_time ['gen_time'] -2961521519262 2014-03-19 00:12:45 gen_timestamp Счетчика нет (10 ['gen_timestamp'] -2961521519262 2014-03-19 00:12:45 hasTemplates Счетчик есть null ['hasTemplates'] -2961521519262 2014-03-19 00:12:45 hasTemplates Счетчик есть null ['hasTemplates'] -2961521519262 2014-03-19 00:12:45 hasTemplates Счетчик есть null ['hasTemplates'] -2961521519262 2014-03-19 00:12:45 Роль пользователя ['Роль пользователя'] -2961521519262 2014-03-19 00:12:46 videoid ['videoid'] -2961521519262 2014-03-19 00:12:46 videoid Done ['videoid'] -2961521519262 2014-03-19 00:12:46 Поиск ['Поиск'] -2961521519262 2014-03-19 00:12:47 gen_timestamp Счетчика нет (10 ['gen_timestamp'] -2961521519262 2014-03-19 00:12:47 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] -2961521519262 2014-03-19 00:12:51 videoid 8950vvvv ['videoid'] -2961521519262 2014-03-19 00:14:11 errors SMS ['errors'] -2961521519262 2014-03-19 00:14:12 gen_timestamp Счетчика нет (10 ['gen_timestamp'] -2961521519262 2014-03-19 00:15:11 videoid Done ['videoid'] -2961521519262 2014-03-19 00:15:12 videoid ['videoid'] -2961521519262 2014-03-19 00:15:12 Поиск ['Поиск'] -2961521519262 2014-03-19 00:15:12 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] -2961521519262 2014-03-19 00:15:17 videoid 8950vvvv ['videoid'] -2961521519262 2014-03-19 00:19:21 gen_timestamp 564.815 ['gen_timestamp'] -2961521519262 2014-03-19 00:19:22 gen_timestamp 564.815 ['gen_timestamp'] -164599821266083 2014-03-22 10:30:50 gen_time views ['gen_time'] -164599821266083 2014-03-22 10:30:50 gen_timestamp vkontakte,face element ['gen_timestamp'] -164599821266083 2014-03-22 10:30:50 Платность Превьюшки ['Платность'] -164599821266083 2014-03-22 10:30:53 Аттачи в списке /pages/biblio ['Аттачи в списке'] -164599821266083 2014-03-22 10:31:02 Платность Превьюшки ['Платность'] -164599821266083 2014-03-22 10:31:05 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] -164599821266083 2014-03-22 10:31:06 Поиск Счетчика ['Поиск'] -335386658089392 2014-03-18 04:28:17 gen_time ['gen_time'] -335386658089392 2014-03-18 04:28:18 Роль пользователя ['Роль пользователя'] -335386658089392 2014-03-18 04:28:25 Поиск ['Поиск'] -335386658089392 2014-03-18 04:28:26 videoid ['videoid'] -335386658089392 2014-03-18 04:28:26 videoid Done ['videoid'] -335386658089392 2014-03-18 04:28:26 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] -335386658089392 2014-03-18 05:48:43 gen_time ['gen_time'] -335386658089392 2014-03-18 05:48:43 Роль пользователя ['Роль пользователя'] -335386658089392 2014-03-18 05:48:44 Дра ['Дра'] -419794772979101 2014-03-17 22:08:42 gen_time ['gen_time'] -419794772979101 2014-03-17 22:08:43 Роль пользователя ['Роль пользователя'] -419794772979101 2014-03-17 22:08:50 Поиск ['Поиск'] -419794772979101 2014-03-17 22:08:50 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] -419794772979101 2014-03-19 14:26:49 gen_time ['gen_time'] -419794772979101 2014-03-19 14:26:49 Дра ['Дра'] -419794772979101 2014-03-19 14:26:49 Эксперимент про Счетчик есть null ['Эксперимент про'] -419794772979101 2014-03-19 14:26:52 Поиск ['Поиск'] -419794772979101 2014-03-19 14:26:52 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] -818136115259082 2014-03-18 11:33:51 gen_time ['gen_time'] -818136115259082 2014-03-18 11:33:51 Дра ['Дра'] -818136115259082 2014-03-18 11:33:52 Роль пользователя ['Роль пользователя'] -818136115259082 2014-03-18 11:33:55 Поиск ['Поиск'] -818136115259082 2014-03-18 11:33:55 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] -818136115259082 2014-03-18 11:34:42 gen_timestamp 564.815 ['gen_timestamp'] -818136115259082 2014-03-18 11:34:44 gen_timestamp 564.815 ['gen_timestamp'] -870023587671569 2014-03-16 22:17:44 gen_time ['gen_time'] -870023587671569 2014-03-16 22:17:44 gen_timestamp vkontakte,face element ['gen_timestamp'] -1391426411567902 2014-03-18 10:39:27 gen_time ['gen_time'] -1391426411567902 2014-03-18 10:39:27 gen_timestamp Счетчика нет (10 ['gen_timestamp'] -1391426411567902 2014-03-18 10:39:27 Платность model ['Платность'] -1391426411567902 2014-03-18 10:39:27 Эксперимент про Счетчик есть null ['Эксперимент про'] -1391426411567902 2014-03-18 10:39:37 videoid ['videoid'] -1391426411567902 2014-03-18 10:39:37 videoid Done ['videoid'] -1391426411567902 2014-03-18 10:39:37 Поиск ['Поиск'] -1391426411567902 2014-03-18 10:39:37 Поиск ['Поиск'] -1391426411567902 2014-03-18 10:39:37 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] -1391426411567902 2014-03-18 10:39:37 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] -1391426411567902 2014-03-18 10:40:07 gen_timestamp Счетчика нет (10 ['gen_timestamp'] -1391426411567902 2014-03-18 10:43:42 Toolbar auto 25%;\n\t\t\t\t \n \n return ['Toolbar'] -1391426411567902 2014-03-18 10:43:42 gen_timestamp Счетчика нет (10 ['gen_timestamp'] -1391426411567902 2014-03-18 10:43:42 gen_timestamp Счетчика нет (10 ['gen_timestamp'] -1391426411567902 2014-03-18 10:43:42 Права ['Права'] -1391426411567902 2014-03-18 10:43:44 gen_timestamp Счетчика нет (10 ['gen_timestamp'] -1391426411567902 2014-03-18 10:43:48 Toolbar Счетчик ['Toolbar'] -1391426411567902 2014-03-18 10:43:48 gen_timestamp Счетчика нет (10 ['gen_timestamp'] -1391426411567902 2014-03-18 10:43:48 Поиск ['Поиск'] -1391426411567902 2014-03-18 10:43:48 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] -1560113638322066 2014-03-17 13:21:19 hasTemplates Счетчик есть null ['hasTemplates'] -1560113638322066 2014-03-17 13:21:19 hasTemplates Счетчик есть null ['hasTemplates'] -1560113638322066 2014-03-17 13:28:21 hasTemplates Счетчик есть null ['hasTemplates'] -1560113638322066 2014-03-17 13:28:22 hasTemplates Счетчик есть null ['hasTemplates'] -1560113638322066 2014-03-17 13:28:22 hasTemplates Счетчик есть null ['hasTemplates'] -1560113638322066 2014-03-17 17:01:54 hasTemplates Счетчик есть null ['hasTemplates'] -1560113638322066 2014-03-17 17:15:06 hasTemplates Счетчик есть null ['hasTemplates'] -1560113638322066 2014-03-17 17:15:09 hasTemplates Счетчик есть null ['hasTemplates'] -1560113638322066 2014-03-17 17:15:09 hasTemplates Счетчик есть null ['hasTemplates'] -1560113638322066 2014-03-17 17:22:15 errors 1e521117369b39edaa078c8021df25 ['errors'] -1560113638322066 2014-03-17 17:22:15 gen_time ['gen_time'] -1560113638322066 2014-03-17 17:22:15 hasTemplates Счетчик есть null ['hasTemplates'] -1560113638322066 2014-03-17 17:22:15 Дра ['Дра'] -1560113638322066 2014-03-17 17:22:15 Поиск ['Поиск'] -1560113638322066 2014-03-17 17:22:16 Toolbar Счетчик ['Toolbar'] -1560113638322066 2014-03-17 17:22:16 errors 1e521117369b39edaa078c8021df25 ['errors'] -1560113638322066 2014-03-17 17:22:16 errors 1e521117369b39edaa078c8021df25 ['errors'] -1560113638322066 2014-03-17 17:22:16 errors 1e521117369b39edaa078c8021df25 ['errors'] -1560113638322066 2014-03-17 17:22:16 errors 1e521117369b39edaa078c8021df25 ['errors'] -1560113638322066 2014-03-17 17:22:16 errors 1e521117369b39edaa078c8021df25 ['errors'] -1560113638322066 2014-03-17 17:22:16 errors 1e521117369b39edaa078c8021df25 ['errors'] -1560113638322066 2014-03-17 17:22:16 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] -1560113638322066 2014-03-17 17:22:16 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] -1560113638322066 2014-03-17 17:22:42 hasTemplates Счетчик есть null ['hasTemplates'] -1560113638322066 2014-03-17 17:22:46 errors SMS ['errors'] -2961521519262 2014-03-19 00:12:45 [] -2961521519262 2014-03-19 00:12:45 gen_time ['gen_time'] -2961521519262 2014-03-19 00:12:45 gen_timestamp Счетчика нет (10 ['gen_timestamp'] -2961521519262 2014-03-19 00:12:45 hasTemplates Счетчик есть null ['hasTemplates'] -2961521519262 2014-03-19 00:12:45 hasTemplates Счетчик есть null ['hasTemplates'] -2961521519262 2014-03-19 00:12:45 hasTemplates Счетчик есть null ['hasTemplates'] -2961521519262 2014-03-19 00:12:45 Роль пользователя ['Роль пользователя'] -2961521519262 2014-03-19 00:12:46 videoid ['videoid'] -2961521519262 2014-03-19 00:12:46 videoid Done ['videoid'] -2961521519262 2014-03-19 00:12:46 Поиск ['Поиск'] -2961521519262 2014-03-19 00:12:47 [] -2961521519262 2014-03-19 00:12:47 gen_timestamp Счетчика нет (10 ['gen_timestamp'] -2961521519262 2014-03-19 00:12:47 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] -2961521519262 2014-03-19 00:12:51 [] -2961521519262 2014-03-19 00:12:51 videoid 8950vvvv ['videoid'] -2961521519262 2014-03-19 00:13:28 [] -2961521519262 2014-03-19 00:14:11 errors SMS ['errors'] -2961521519262 2014-03-19 00:14:12 gen_timestamp Счетчика нет (10 ['gen_timestamp'] -2961521519262 2014-03-19 00:15:11 videoid Done ['videoid'] -2961521519262 2014-03-19 00:15:12 [] -2961521519262 2014-03-19 00:15:12 videoid ['videoid'] -2961521519262 2014-03-19 00:15:12 Поиск ['Поиск'] -2961521519262 2014-03-19 00:15:12 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] -2961521519262 2014-03-19 00:15:17 [] -2961521519262 2014-03-19 00:15:17 videoid 8950vvvv ['videoid'] -2961521519262 2014-03-19 00:19:21 gen_timestamp 564.815 ['gen_timestamp'] -2961521519262 2014-03-19 00:19:22 gen_timestamp 564.815 ['gen_timestamp'] -164599821266083 2014-03-22 10:30:50 [] -164599821266083 2014-03-22 10:30:50 [] -164599821266083 2014-03-22 10:30:50 gen_time views ['gen_time'] -164599821266083 2014-03-22 10:30:50 gen_timestamp vkontakte,face element ['gen_timestamp'] -164599821266083 2014-03-22 10:30:50 Платность Превьюшки ['Платность'] -164599821266083 2014-03-22 10:30:53 Аттачи в списке /pages/biblio ['Аттачи в списке'] -164599821266083 2014-03-22 10:31:02 Платность Превьюшки ['Платность'] -164599821266083 2014-03-22 10:31:05 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] -164599821266083 2014-03-22 10:31:06 [] -164599821266083 2014-03-22 10:31:06 Поиск Счетчика ['Поиск'] -335386658089392 2014-03-18 04:28:17 [] -335386658089392 2014-03-18 04:28:17 gen_time ['gen_time'] -335386658089392 2014-03-18 04:28:18 Роль пользователя ['Роль пользователя'] -335386658089392 2014-03-18 04:28:25 Поиск ['Поиск'] -335386658089392 2014-03-18 04:28:26 [] -335386658089392 2014-03-18 04:28:26 videoid ['videoid'] -335386658089392 2014-03-18 04:28:26 videoid Done ['videoid'] -335386658089392 2014-03-18 04:28:26 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] -335386658089392 2014-03-18 04:28:31 [] -335386658089392 2014-03-18 04:28:31 [] -335386658089392 2014-03-18 05:48:43 [] -335386658089392 2014-03-18 05:48:43 gen_time ['gen_time'] -335386658089392 2014-03-18 05:48:43 Роль пользователя ['Роль пользователя'] -335386658089392 2014-03-18 05:48:44 Дра ['Дра'] -419794772979101 2014-03-17 22:08:42 [] -419794772979101 2014-03-17 22:08:42 gen_time ['gen_time'] -419794772979101 2014-03-17 22:08:43 Роль пользователя ['Роль пользователя'] -419794772979101 2014-03-17 22:08:50 Поиск ['Поиск'] -419794772979101 2014-03-17 22:08:50 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] -419794772979101 2014-03-17 22:08:51 [] -419794772979101 2014-03-19 14:26:49 [] -419794772979101 2014-03-19 14:26:49 gen_time ['gen_time'] -419794772979101 2014-03-19 14:26:49 Дра ['Дра'] -419794772979101 2014-03-19 14:26:49 Эксперимент про Счетчик есть null ['Эксперимент про'] -419794772979101 2014-03-19 14:26:52 [] -419794772979101 2014-03-19 14:26:52 Поиск ['Поиск'] -419794772979101 2014-03-19 14:26:52 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] -818136115259082 2014-03-18 11:33:51 [] -818136115259082 2014-03-18 11:33:51 gen_time ['gen_time'] -818136115259082 2014-03-18 11:33:51 Дра ['Дра'] -818136115259082 2014-03-18 11:33:52 Роль пользователя ['Роль пользователя'] -818136115259082 2014-03-18 11:33:55 Поиск ['Поиск'] -818136115259082 2014-03-18 11:33:55 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] -818136115259082 2014-03-18 11:33:56 [] -818136115259082 2014-03-18 11:34:42 gen_timestamp 564.815 ['gen_timestamp'] -818136115259082 2014-03-18 11:34:44 [] -818136115259082 2014-03-18 11:34:44 gen_timestamp 564.815 ['gen_timestamp'] -870023587671569 2014-03-16 22:17:44 [] -870023587671569 2014-03-16 22:17:44 [] -870023587671569 2014-03-16 22:17:44 gen_time ['gen_time'] -870023587671569 2014-03-16 22:17:44 gen_timestamp vkontakte,face element ['gen_timestamp'] -870023587671569 2014-03-16 22:17:45 [] -870023587671569 2014-03-16 22:17:46 [] -1026297835113727 2014-03-17 18:41:53 [] -1026297835113727 2014-03-18 02:37:28 [] -1026297835113727 2014-03-19 18:26:53 [] -1026297835113727 2014-03-20 05:37:49 [] -1026297835113727 2014-03-20 05:49:38 [] -1026297835113727 2014-03-20 13:39:34 [] -1026297835113727 2014-03-21 07:38:39 [] -1026297835113727 2014-03-22 07:13:26 [] -1026297835113727 2014-03-22 10:50:25 [] -1391426411567902 2014-03-18 10:39:27 [] -1391426411567902 2014-03-18 10:39:27 gen_time ['gen_time'] -1391426411567902 2014-03-18 10:39:27 gen_timestamp Счетчика нет (10 ['gen_timestamp'] -1391426411567902 2014-03-18 10:39:27 Платность model ['Платность'] -1391426411567902 2014-03-18 10:39:27 Эксперимент про Счетчик есть null ['Эксперимент про'] -1391426411567902 2014-03-18 10:39:37 [] -1391426411567902 2014-03-18 10:39:37 videoid ['videoid'] -1391426411567902 2014-03-18 10:39:37 videoid Done ['videoid'] -1391426411567902 2014-03-18 10:39:37 Поиск ['Поиск'] -1391426411567902 2014-03-18 10:39:37 Поиск ['Поиск'] -1391426411567902 2014-03-18 10:39:37 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] +2961521519262 gen_time ['gen_time'] +2961521519262 gen_timestamp Счетчика нет (10 ['gen_timestamp'] +2961521519262 hasTemplates Счетчик есть null ['hasTemplates'] +2961521519262 hasTemplates Счетчик есть null ['hasTemplates'] +2961521519262 hasTemplates Счетчик есть null ['hasTemplates'] +2961521519262 Роль пользователя ['Роль пользователя'] +2961521519262 videoid ['videoid'] +2961521519262 videoid Done ['videoid'] +2961521519262 Поиск ['Поиск'] +2961521519262 gen_timestamp Счетчика нет (10 ['gen_timestamp'] +2961521519262 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] +2961521519262 videoid 8950vvvv ['videoid'] +2961521519262 errors SMS ['errors'] +2961521519262 gen_timestamp Счетчика нет (10 ['gen_timestamp'] +2961521519262 videoid Done ['videoid'] +2961521519262 videoid ['videoid'] +2961521519262 Поиск ['Поиск'] +2961521519262 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] +2961521519262 videoid 8950vvvv ['videoid'] +2961521519262 gen_timestamp 564.815 ['gen_timestamp'] +2961521519262 gen_timestamp 564.815 ['gen_timestamp'] +164599821266083 gen_time views ['gen_time'] +164599821266083 gen_timestamp vkontakte,face element ['gen_timestamp'] +164599821266083 Платность Превьюшки ['Платность'] +164599821266083 Аттачи в списке /pages/biblio ['Аттачи в списке'] +164599821266083 Платность Превьюшки ['Платность'] +164599821266083 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] +164599821266083 Поиск Счетчика ['Поиск'] +335386658089392 gen_time ['gen_time'] +335386658089392 Роль пользователя ['Роль пользователя'] +335386658089392 Поиск ['Поиск'] +335386658089392 videoid ['videoid'] +335386658089392 videoid Done ['videoid'] +335386658089392 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] +335386658089392 gen_time ['gen_time'] +335386658089392 Роль пользователя ['Роль пользователя'] +335386658089392 Дра ['Дра'] +419794772979101 gen_time ['gen_time'] +419794772979101 Роль пользователя ['Роль пользователя'] +419794772979101 Поиск ['Поиск'] +419794772979101 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] +419794772979101 gen_time ['gen_time'] +419794772979101 Дра ['Дра'] +419794772979101 Эксперимент про Счетчик есть null ['Эксперимент про'] +419794772979101 Поиск ['Поиск'] +419794772979101 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] +818136115259082 gen_time ['gen_time'] +818136115259082 Дра ['Дра'] +818136115259082 Роль пользователя ['Роль пользователя'] +818136115259082 Поиск ['Поиск'] +818136115259082 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] +818136115259082 gen_timestamp 564.815 ['gen_timestamp'] +818136115259082 gen_timestamp 564.815 ['gen_timestamp'] +870023587671569 gen_time ['gen_time'] +870023587671569 gen_timestamp vkontakte,face element ['gen_timestamp'] +1391426411567902 gen_time ['gen_time'] +1391426411567902 gen_timestamp Счетчика нет (10 ['gen_timestamp'] +1391426411567902 Платность model ['Платность'] +1391426411567902 Эксперимент про Счетчик есть null ['Эксперимент про'] +1391426411567902 videoid ['videoid'] +1391426411567902 videoid Done ['videoid'] +1391426411567902 Поиск ['Поиск'] +1391426411567902 Поиск ['Поиск'] +1391426411567902 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] +1391426411567902 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] +1391426411567902 gen_timestamp Счетчика нет (10 ['gen_timestamp'] +1391426411567902 Toolbar auto 25%;\n\t\t\t\t \n \n return ['Toolbar'] +1391426411567902 gen_timestamp Счетчика нет (10 ['gen_timestamp'] +1391426411567902 gen_timestamp Счетчика нет (10 ['gen_timestamp'] +1391426411567902 Права ['Права'] +1391426411567902 gen_timestamp Счетчика нет (10 ['gen_timestamp'] +1391426411567902 Toolbar Счетчик ['Toolbar'] +1391426411567902 gen_timestamp Счетчика нет (10 ['gen_timestamp'] +1391426411567902 Поиск ['Поиск'] +1391426411567902 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] +1560113638322066 hasTemplates Счетчик есть null ['hasTemplates'] +1560113638322066 hasTemplates Счетчик есть null ['hasTemplates'] +1560113638322066 hasTemplates Счетчик есть null ['hasTemplates'] +1560113638322066 hasTemplates Счетчик есть null ['hasTemplates'] +1560113638322066 hasTemplates Счетчик есть null ['hasTemplates'] +1560113638322066 hasTemplates Счетчик есть null ['hasTemplates'] +1560113638322066 hasTemplates Счетчик есть null ['hasTemplates'] +1560113638322066 hasTemplates Счетчик есть null ['hasTemplates'] +1560113638322066 hasTemplates Счетчик есть null ['hasTemplates'] +1560113638322066 errors 1e521117369b39edaa078c8021df25 ['errors'] +1560113638322066 gen_time ['gen_time'] +1560113638322066 hasTemplates Счетчик есть null ['hasTemplates'] +1560113638322066 Дра ['Дра'] +1560113638322066 Поиск ['Поиск'] +1560113638322066 Toolbar Счетчик ['Toolbar'] +1560113638322066 errors 1e521117369b39edaa078c8021df25 ['errors'] +1560113638322066 errors 1e521117369b39edaa078c8021df25 ['errors'] +1560113638322066 errors 1e521117369b39edaa078c8021df25 ['errors'] +1560113638322066 errors 1e521117369b39edaa078c8021df25 ['errors'] +1560113638322066 errors 1e521117369b39edaa078c8021df25 ['errors'] +1560113638322066 errors 1e521117369b39edaa078c8021df25 ['errors'] +1560113638322066 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] +1560113638322066 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] +1560113638322066 hasTemplates Счетчик есть null ['hasTemplates'] +1560113638322066 errors SMS ['errors'] +2961521519262 [] +2961521519262 gen_time ['gen_time'] +2961521519262 gen_timestamp Счетчика нет (10 ['gen_timestamp'] +2961521519262 hasTemplates Счетчик есть null ['hasTemplates'] +2961521519262 hasTemplates Счетчик есть null ['hasTemplates'] +2961521519262 hasTemplates Счетчик есть null ['hasTemplates'] +2961521519262 Роль пользователя ['Роль пользователя'] +2961521519262 videoid ['videoid'] +2961521519262 videoid Done ['videoid'] +2961521519262 Поиск ['Поиск'] +2961521519262 [] +2961521519262 gen_timestamp Счетчика нет (10 ['gen_timestamp'] +2961521519262 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] +2961521519262 [] +2961521519262 videoid 8950vvvv ['videoid'] +2961521519262 [] +2961521519262 errors SMS ['errors'] +2961521519262 gen_timestamp Счетчика нет (10 ['gen_timestamp'] +2961521519262 videoid Done ['videoid'] +2961521519262 [] +2961521519262 videoid ['videoid'] +2961521519262 Поиск ['Поиск'] +2961521519262 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] +2961521519262 [] +2961521519262 videoid 8950vvvv ['videoid'] +2961521519262 gen_timestamp 564.815 ['gen_timestamp'] +2961521519262 gen_timestamp 564.815 ['gen_timestamp'] +164599821266083 [] +164599821266083 [] +164599821266083 gen_time views ['gen_time'] +164599821266083 gen_timestamp vkontakte,face element ['gen_timestamp'] +164599821266083 Платность Превьюшки ['Платность'] +164599821266083 Аттачи в списке /pages/biblio ['Аттачи в списке'] +164599821266083 Платность Превьюшки ['Платность'] +164599821266083 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] +164599821266083 [] +164599821266083 Поиск Счетчика ['Поиск'] +335386658089392 [] +335386658089392 gen_time ['gen_time'] +335386658089392 Роль пользователя ['Роль пользователя'] +335386658089392 Поиск ['Поиск'] +335386658089392 [] +335386658089392 videoid ['videoid'] +335386658089392 videoid Done ['videoid'] +335386658089392 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] +335386658089392 [] +335386658089392 [] +335386658089392 [] +335386658089392 gen_time ['gen_time'] +335386658089392 Роль пользователя ['Роль пользователя'] +335386658089392 Дра ['Дра'] +419794772979101 [] +419794772979101 gen_time ['gen_time'] +419794772979101 Роль пользователя ['Роль пользователя'] +419794772979101 Поиск ['Поиск'] +419794772979101 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] +419794772979101 [] +419794772979101 [] +419794772979101 gen_time ['gen_time'] +419794772979101 Дра ['Дра'] +419794772979101 Эксперимент про Счетчик есть null ['Эксперимент про'] +419794772979101 [] +419794772979101 Поиск ['Поиск'] +419794772979101 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] +818136115259082 [] +818136115259082 gen_time ['gen_time'] +818136115259082 Дра ['Дра'] +818136115259082 Роль пользователя ['Роль пользователя'] +818136115259082 Поиск ['Поиск'] +818136115259082 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] +818136115259082 [] +818136115259082 gen_timestamp 564.815 ['gen_timestamp'] +818136115259082 [] +818136115259082 gen_timestamp 564.815 ['gen_timestamp'] +870023587671569 [] +870023587671569 [] +870023587671569 gen_time ['gen_time'] +870023587671569 gen_timestamp vkontakte,face element ['gen_timestamp'] +870023587671569 [] +870023587671569 [] +1026297835113727 [] +1026297835113727 [] +1026297835113727 [] +1026297835113727 [] +1026297835113727 [] +1026297835113727 [] +1026297835113727 [] +1026297835113727 [] +1026297835113727 [] +1391426411567902 [] +1391426411567902 gen_time ['gen_time'] +1391426411567902 gen_timestamp Счетчика нет (10 ['gen_timestamp'] +1391426411567902 Платность model ['Платность'] +1391426411567902 Эксперимент про Счетчик есть null ['Эксперимент про'] +1391426411567902 [] +1391426411567902 videoid ['videoid'] +1391426411567902 videoid Done ['videoid'] +1391426411567902 Поиск ['Поиск'] +1391426411567902 Поиск ['Поиск'] +1391426411567902 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] 4 4 2 0 0 0 8 8 4 diff --git a/tests/queries/1_stateful/00172_parallel_join.sql b/tests/queries/1_stateful/00172_parallel_join.sql index dca546a65a6..ce278e7bfc0 100644 --- a/tests/queries/1_stateful/00172_parallel_join.sql +++ b/tests/queries/1_stateful/00172_parallel_join.sql @@ -178,9 +178,9 @@ ORDER BY LIMIT 20 SETTINGS any_join_distinct_right_table_keys = 1, joined_subquery_requires_alias = 0; -SELECT UserID, EventTime::DateTime('Asia/Dubai'), pp.Key1, pp.Key2, ParsedParams.Key1 FROM test.hits ARRAY JOIN ParsedParams AS pp WHERE CounterID = 1704509 ORDER BY UserID, EventTime, pp.Key1, pp.Key2 LIMIT 100; +SELECT UserID, pp.Key1, pp.Key2, ParsedParams.Key1 FROM test.hits ARRAY JOIN ParsedParams AS pp WHERE CounterID = 1704509 ORDER BY UserID, EventTime, pp.Key1, pp.Key2 LIMIT 100; -SELECT UserID, EventTime::DateTime('Asia/Dubai'), pp.Key1, pp.Key2, ParsedParams.Key1 FROM test.hits LEFT ARRAY JOIN ParsedParams AS pp WHERE CounterID = 1704509 ORDER BY UserID, EventTime, pp.Key1, pp.Key2 LIMIT 100; +SELECT UserID, pp.Key1, pp.Key2, ParsedParams.Key1 FROM test.hits LEFT ARRAY JOIN ParsedParams AS pp WHERE CounterID = 1704509 ORDER BY UserID, EventTime, pp.Key1, pp.Key2 LIMIT 100; SELECT a.*, b.* FROM ( From 9e364912d81eb1dc0fddfbcaf33c52e840c9f820 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Wed, 20 Apr 2022 17:01:02 +0800 Subject: [PATCH 024/106] update Settings comment --- src/Core/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 2e6b9aeffff..b5e40d2cf03 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -351,7 +351,7 @@ class IColumn; M(UInt64, partial_merge_join_left_table_buffer_bytes, 0, "If not 0 group left table blocks in bigger ones for left-side table in partial merge join. It uses up to 2x of specified memory per joining thread.", 0) \ M(UInt64, partial_merge_join_rows_in_right_blocks, 65536, "Split right-hand joining data in blocks of specified size. It's a portion of data indexed by min-max values and possibly unloaded on disk.", 0) \ M(UInt64, join_on_disk_max_files_to_merge, 64, "For MergeJoin on disk set how much files it's allowed to sort simultaneously. Then this value bigger then more memory used and then less disk I/O needed. Minimum is 2.", 0) \ - M(Bool, enable_parallel_hash_join, false, "Enable paralle join algorithm.", 0)\ + M(Bool, enable_parallel_hash_join, false, "Enable parallel join algorithm.", 0)\ M(String, temporary_files_codec, "LZ4", "Set compression codec for temporary files (sort and join on disk). I.e. LZ4, NONE.", 0) \ \ M(UInt64, max_rows_to_transfer, 0, "Maximum size (in rows) of the transmitted external table obtained when the GLOBAL IN/JOIN section is executed.", 0) \ From c844415578cd64dceafcce0073669e4bc031f909 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Wed, 20 Apr 2022 19:47:16 +0800 Subject: [PATCH 025/106] add new join_algorithm `parallel_hash` --- src/Core/Settings.h | 3 +-- src/Core/SettingsEnums.cpp | 3 ++- src/Core/SettingsEnums.h | 1 + src/Interpreters/ConcurrentHashJoin.cpp | 13 +++++++------ src/Interpreters/ConcurrentHashJoin.h | 7 +++---- src/Interpreters/TableJoin.cpp | 5 +---- src/Interpreters/TableJoin.h | 3 +-- src/QueryPipeline/QueryPipelineBuilder.cpp | 4 +++- tests/queries/1_stateful/00172_parallel_join.sql | 4 ++-- 9 files changed, 21 insertions(+), 22 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index b5e40d2cf03..d64e5b10d5b 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -346,12 +346,11 @@ class IColumn; M(UInt64, max_bytes_in_join, 0, "Maximum size of the hash table for JOIN (in number of bytes in memory).", 0) \ M(OverflowMode, join_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.", 0) \ M(Bool, join_any_take_last_row, false, "When disabled (default) ANY JOIN will take the first found row for a key. When enabled, it will take the last row seen if there are multiple rows for the same key.", IMPORTANT) \ - M(JoinAlgorithm, join_algorithm, JoinAlgorithm::HASH, "Specify join algorithm: 'auto', 'hash', 'partial_merge', 'prefer_partial_merge'. 'auto' tries to change HashJoin to MergeJoin on the fly to avoid out of memory.", 0) \ + M(JoinAlgorithm, join_algorithm, JoinAlgorithm::HASH, "Specify join algorithm: 'auto', 'hash', 'partial_merge', 'prefer_partial_merge', 'parallel_hash'. 'auto' tries to change HashJoin to MergeJoin on the fly to avoid out of memory.", 0) \ M(UInt64, default_max_bytes_in_join, 1000000000, "Maximum size of right-side table if limit is required but max_bytes_in_join is not set.", 0) \ M(UInt64, partial_merge_join_left_table_buffer_bytes, 0, "If not 0 group left table blocks in bigger ones for left-side table in partial merge join. It uses up to 2x of specified memory per joining thread.", 0) \ M(UInt64, partial_merge_join_rows_in_right_blocks, 65536, "Split right-hand joining data in blocks of specified size. It's a portion of data indexed by min-max values and possibly unloaded on disk.", 0) \ M(UInt64, join_on_disk_max_files_to_merge, 64, "For MergeJoin on disk set how much files it's allowed to sort simultaneously. Then this value bigger then more memory used and then less disk I/O needed. Minimum is 2.", 0) \ - M(Bool, enable_parallel_hash_join, false, "Enable parallel join algorithm.", 0)\ M(String, temporary_files_codec, "LZ4", "Set compression codec for temporary files (sort and join on disk). I.e. LZ4, NONE.", 0) \ \ M(UInt64, max_rows_to_transfer, 0, "Maximum size (in rows) of the transmitted external table obtained when the GLOBAL IN/JOIN section is executed.", 0) \ diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index 3f68038560c..a37c1e9be86 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -34,7 +34,8 @@ IMPLEMENT_SETTING_ENUM(JoinAlgorithm, ErrorCodes::UNKNOWN_JOIN, {{"auto", JoinAlgorithm::AUTO}, {"hash", JoinAlgorithm::HASH}, {"partial_merge", JoinAlgorithm::PARTIAL_MERGE}, - {"prefer_partial_merge", JoinAlgorithm::PREFER_PARTIAL_MERGE}}) + {"prefer_partial_merge", JoinAlgorithm::PREFER_PARTIAL_MERGE}, + {"parallel_hash", JoinAlgorithm::PARALLEL_HASH}}) IMPLEMENT_SETTING_ENUM(TotalsMode, ErrorCodes::UNKNOWN_TOTALS_MODE, diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index 7fe54c12665..08091da6d6c 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -42,6 +42,7 @@ enum class JoinAlgorithm HASH, PARTIAL_MERGE, PREFER_PARTIAL_MERGE, + PARALLEL_HASH, }; DECLARE_SETTING_ENUM(JoinAlgorithm) diff --git a/src/Interpreters/ConcurrentHashJoin.cpp b/src/Interpreters/ConcurrentHashJoin.cpp index 4b551bc7de6..3c72c5963bc 100644 --- a/src/Interpreters/ConcurrentHashJoin.cpp +++ b/src/Interpreters/ConcurrentHashJoin.cpp @@ -59,10 +59,11 @@ bool ConcurrentHashJoin::addJoinedBlock(const Block & block, bool check_limits) { auto & hash_join = hash_joins[i]; auto & dispatched_block = dispatched_blocks[i]; - std::unique_lock lock(hash_join->mutex); - hash_join->rows += dispatched_block.rows(); - check_total_rows += dispatched_block.rows(); + auto rows = dispatched_block.rows(); + check_total_rows += rows; check_total_bytes += dispatched_block.bytes(); + std::unique_lock lock(hash_join->mutex); + // Don't take the real insertion here, because inserting a block into HashTable is a time-consuming operation, // it may cause serious lock contention and make the whole process slow. hash_join->pending_right_blocks.emplace_back(std::move(dispatched_block)); @@ -283,7 +284,7 @@ void ConcurrentHashJoin::waitAllAddJoinedBlocksFinished() std::shared_ptr hash_join; { std::unique_lock lock(finished_add_joined_blocks_tasks_mutex); - hash_join = getUnfinishedAddJoinedBlockTaks(); + hash_join = getUnfinishedAddJoinedBlockTasks(); if (!hash_join) { while (finished_add_joined_blocks_tasks < hash_joins.size()) @@ -293,7 +294,7 @@ void ConcurrentHashJoin::waitAllAddJoinedBlocksFinished() return; } } - std::unique_lock lock(hash_join->mutex); + while (!hash_join->pending_right_blocks.empty()) { Block & block = hash_join->pending_right_blocks.front(); @@ -305,7 +306,7 @@ void ConcurrentHashJoin::waitAllAddJoinedBlocksFinished() } } -std::shared_ptr ConcurrentHashJoin::getUnfinishedAddJoinedBlockTaks() +std::shared_ptr ConcurrentHashJoin::getUnfinishedAddJoinedBlockTasks() { for (auto & hash_join : hash_joins) { diff --git a/src/Interpreters/ConcurrentHashJoin.h b/src/Interpreters/ConcurrentHashJoin.h index bfac9b61eb2..35ac29c213f 100644 --- a/src/Interpreters/ConcurrentHashJoin.h +++ b/src/Interpreters/ConcurrentHashJoin.h @@ -12,6 +12,7 @@ #include #include #include + namespace DB { namespace JoinStuff @@ -54,9 +55,7 @@ private: std::mutex mutex; std::unique_ptr data; std::list pending_right_blocks; - std::atomic in_inserting = false; - std::atomic rows = 0; - + bool in_inserting = false; }; ContextPtr context; std::shared_ptr table_join; @@ -98,7 +97,7 @@ private: static void dispatchBlock(BlockDispatchControlData & dispatch_data, Block & from_block, std::vector & dispatched_blocks); void waitAllAddJoinedBlocksFinished(); - std::shared_ptr getUnfinishedAddJoinedBlockTaks(); + std::shared_ptr getUnfinishedAddJoinedBlockTasks(); }; } diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp index 5c034498cda..ed882c0fd16 100644 --- a/src/Interpreters/TableJoin.cpp +++ b/src/Interpreters/TableJoin.cpp @@ -106,7 +106,6 @@ TableJoin::TableJoin(const Settings & settings, VolumePtr tmp_volume_) , partial_merge_join_left_table_buffer_bytes(settings.partial_merge_join_left_table_buffer_bytes) , max_files_to_merge(settings.join_on_disk_max_files_to_merge) , temporary_files_codec(settings.temporary_files_codec) - , enable_parallel_hash_join(settings.enable_parallel_hash_join) , tmp_volume(tmp_volume_) { } @@ -751,9 +750,7 @@ void TableJoin::resetToCross() bool TableJoin::allowConcurrentHashJoin() const { - if (!enable_parallel_hash_join) - return false; - if (dictionary_reader || join_algorithm != JoinAlgorithm::HASH) + if (dictionary_reader || join_algorithm != JoinAlgorithm::PARALLEL_HASH) return false; if (table_join.kind != ASTTableJoin::Kind::Left && table_join.kind != ASTTableJoin::Kind::Inner) return false; diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h index 1d9ddc69a31..d990cadbcca 100644 --- a/src/Interpreters/TableJoin.h +++ b/src/Interpreters/TableJoin.h @@ -112,7 +112,6 @@ private: const size_t partial_merge_join_left_table_buffer_bytes = 0; const size_t max_files_to_merge = 0; const String temporary_files_codec = "LZ4"; - const bool enable_parallel_hash_join = false; /// the limit has no technical reasons, it supposed to improve safety const size_t MAX_DISJUNCTS = 16; /// NOLINT @@ -196,7 +195,7 @@ public: bool forceHashJoin() const { /// HashJoin always used for DictJoin - return dictionary_reader || join_algorithm == JoinAlgorithm::HASH; + return dictionary_reader || join_algorithm == JoinAlgorithm::HASH || join_algorithm == JoinAlgorithm::PARALLEL_HASH; } bool forceNullableRight() const { return join_use_nulls && isLeftOrFull(table_join.kind); } diff --git a/src/QueryPipeline/QueryPipelineBuilder.cpp b/src/QueryPipeline/QueryPipelineBuilder.cpp index f9d0e276e27..ed64246b164 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.cpp +++ b/src/QueryPipeline/QueryPipelineBuilder.cpp @@ -338,7 +338,9 @@ std::unique_ptr QueryPipelineBuilder::joinPipelines( /// ╞> FillingJoin ─> Resize ╣ ╞> Joining ─> (totals) /// (totals) ─────────┘ ╙─────┘ - size_t num_streams = left->getNumStreams() < max_streams ? max_streams : left->getNumStreams(); + // In some cases, left's streams is too smaller then max_streams. Keep it same as max_streams + // to make full use of cpu. + auto & num_streams = max_streams; left->resize(num_streams); if (join->supportParallelJoin() && !right->hasTotals()) diff --git a/tests/queries/1_stateful/00172_parallel_join.sql b/tests/queries/1_stateful/00172_parallel_join.sql index ce278e7bfc0..b021b040dc2 100644 --- a/tests/queries/1_stateful/00172_parallel_join.sql +++ b/tests/queries/1_stateful/00172_parallel_join.sql @@ -1,4 +1,4 @@ -set enable_parallel_hash_join=true; +set join_algorithm='parallel_hash'; SELECT EventDate, hits, @@ -38,7 +38,7 @@ FROM test.hits ANY LEFT JOIN GROUP BY EventDate ORDER BY hits DESC LIMIT 10 -SETTINGS joined_subquery_requires_alias = 0, enable_parallel_hash_join=true; +SETTINGS joined_subquery_requires_alias = 0; SELECT From f461d18de5e9f04bed6fa2b647a5725a44c086e4 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Wed, 20 Apr 2022 19:51:35 +0800 Subject: [PATCH 026/106] update method name --- src/Interpreters/ExpressionAnalyzer.cpp | 2 +- src/Interpreters/TableJoin.cpp | 2 +- src/Interpreters/TableJoin.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index ce1376c41e4..c1773edf740 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -936,7 +936,7 @@ static std::shared_ptr chooseJoinAlgorithm(std::shared_ptr ana bool allow_merge_join = analyzed_join->allowMergeJoin(); if (analyzed_join->forceHashJoin() || (analyzed_join->preferMergeJoin() && !allow_merge_join)) { - if (analyzed_join->allowConcurrentHashJoin()) + if (analyzed_join->allowParallelHashJoin()) { return std::make_shared(context, analyzed_join, context->getSettings().max_threads, sample_block); } diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp index ed882c0fd16..a0e47a429d7 100644 --- a/src/Interpreters/TableJoin.cpp +++ b/src/Interpreters/TableJoin.cpp @@ -748,7 +748,7 @@ void TableJoin::resetToCross() this->table_join.kind = ASTTableJoin::Kind::Cross; } -bool TableJoin::allowConcurrentHashJoin() const +bool TableJoin::allowParallelHashJoin() const { if (dictionary_reader || join_algorithm != JoinAlgorithm::PARALLEL_HASH) return false; diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h index d990cadbcca..905788e021c 100644 --- a/src/Interpreters/TableJoin.h +++ b/src/Interpreters/TableJoin.h @@ -191,7 +191,7 @@ public: bool allowMergeJoin() const; bool preferMergeJoin() const { return join_algorithm == JoinAlgorithm::PREFER_PARTIAL_MERGE; } bool forceMergeJoin() const { return join_algorithm == JoinAlgorithm::PARTIAL_MERGE; } - bool allowConcurrentHashJoin() const; + bool allowParallelHashJoin() const; bool forceHashJoin() const { /// HashJoin always used for DictJoin From 30c99d3ef22974950785a7121dcf978d0e37ae5c Mon Sep 17 00:00:00 2001 From: dmitriy Date: Wed, 20 Apr 2022 15:55:51 +0300 Subject: [PATCH 027/106] Fix typo in LowCardinality doc --- docs/ru/sql-reference/data-types/lowcardinality.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/data-types/lowcardinality.md b/docs/ru/sql-reference/data-types/lowcardinality.md index 8e872c3bf53..14a9e923ac8 100644 --- a/docs/ru/sql-reference/data-types/lowcardinality.md +++ b/docs/ru/sql-reference/data-types/lowcardinality.md @@ -21,7 +21,7 @@ LowCardinality(data_type) `LowCardinality` — это надстройка, изменяющая способ хранения и правила обработки данных. ClickHouse применяет [словарное кодирование](https://en.wikipedia.org/wiki/Dictionary_coder) в столбцы типа `LowCardinality`. Работа с данными, представленными в словарном виде, может значительно увеличивать производительность запросов [SELECT](../statements/select/index.md) для многих приложений. -Эффективность использования типа данных `LowCarditality` зависит от разнообразия данных. Если словарь содержит менее 10 000 различных значений, ClickHouse в основном показывает более высокую эффективность чтения и хранения данных. Если же словарь содержит более 100 000 различных значений, ClickHouse может работать хуже, чем при использовании обычных типов данных. +Эффективность использования типа данных `LowCardinality` зависит от разнообразия данных. Если словарь содержит менее 10 000 различных значений, ClickHouse в основном показывает более высокую эффективность чтения и хранения данных. Если же словарь содержит более 100 000 различных значений, ClickHouse может работать хуже, чем при использовании обычных типов данных. При работе со строками использование `LowCardinality` вместо [Enum](enum.md) обеспечивает большую гибкость в использовании и часто показывает такую же или более высокую эффективность. From bfd1a0e33a1cc17fb6c0acdd63b9c385e54de705 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Thu, 21 Apr 2022 12:14:36 +0800 Subject: [PATCH 028/106] update codes --- src/Interpreters/ConcurrentHashJoin.cpp | 59 +++++++++---------------- src/Interpreters/ConcurrentHashJoin.h | 23 ++++------ src/Interpreters/ExpressionAnalyzer.cpp | 20 +++++---- src/Interpreters/TableJoin.cpp | 3 +- 4 files changed, 43 insertions(+), 62 deletions(-) diff --git a/src/Interpreters/ConcurrentHashJoin.cpp b/src/Interpreters/ConcurrentHashJoin.cpp index 3c72c5963bc..82846b99cbb 100644 --- a/src/Interpreters/ConcurrentHashJoin.cpp +++ b/src/Interpreters/ConcurrentHashJoin.cpp @@ -19,6 +19,7 @@ #include #include #include +#include "base/logger_useful.h" namespace DB { namespace ErrorCodes @@ -29,7 +30,7 @@ namespace ErrorCodes } namespace JoinStuff { -ConcurrentHashJoin::ConcurrentHashJoin(ContextPtr context_, std::shared_ptr table_join_, size_t slots_, const Block & right_sample_block, bool any_take_last_row_) +ConcurrentHashJoin::ConcurrentHashJoin(ContextPtr context_, std::shared_ptr table_join_, size_t slots_, const Block & left_sample_block, const Block & right_sample_block, bool any_take_last_row_) : context(context_) , table_join(table_join_) , slots(slots_) @@ -41,17 +42,23 @@ ConcurrentHashJoin::ConcurrentHashJoin(ContextPtr context_, std::shared_ptr(); + auto inner_hash_join = std::make_shared(); inner_hash_join->data = std::make_unique(table_join_, right_sample_block, any_take_last_row_); hash_joins.emplace_back(std::move(inner_hash_join)); } - dispatch_datas.emplace_back(std::make_shared()); - dispatch_datas.emplace_back(std::make_shared()); + + dispatch_datas = {std::make_shared(), std::make_shared()}; + const auto & onexpr = table_join->getClauses()[0]; + auto & left_dispatch_data = *dispatch_datas[0]; + std::tie(left_dispatch_data.hash_expression_actions, left_dispatch_data.hash_columns_names) = buildHashExpressionAction(left_sample_block, onexpr.key_names_left); + + auto & right_dispatch_data = *dispatch_datas[1]; + std::tie(right_dispatch_data.hash_expression_actions, right_dispatch_data.hash_columns_names) = buildHashExpressionAction(right_sample_block, onexpr.key_names_right); } bool ConcurrentHashJoin::addJoinedBlock(const Block & block, bool check_limits) { - auto & dispatch_data = getBlockDispatchControlData(block, RIGHT); + auto & dispatch_data = *dispatch_datas[1]; std::vector dispatched_blocks; Block cloned_block = block; dispatchBlock(dispatch_data, cloned_block, dispatched_blocks); @@ -63,7 +70,7 @@ bool ConcurrentHashJoin::addJoinedBlock(const Block & block, bool check_limits) check_total_rows += rows; check_total_bytes += dispatched_block.bytes(); std::unique_lock lock(hash_join->mutex); - + // Don't take the real insertion here, because inserting a block into HashTable is a time-consuming operation, // it may cause serious lock contention and make the whole process slow. hash_join->pending_right_blocks.emplace_back(std::move(dispatched_block)); @@ -77,7 +84,6 @@ bool ConcurrentHashJoin::addJoinedBlock(const Block & block, bool check_limits) void ConcurrentHashJoin::joinBlock(Block & block, std::shared_ptr & not_processed) { - if (block.rows()) waitAllAddJoinedBlocksFinished(); else @@ -87,7 +93,7 @@ void ConcurrentHashJoin::joinBlock(Block & block, std::shared_ptr & return; } - auto & dispatch_data = getBlockDispatchControlData(block, LEFT); + auto & dispatch_data = *dispatch_datas[0]; std::vector dispatched_blocks; Block cloned_block = block; dispatchBlock(dispatch_data, cloned_block, dispatched_blocks); @@ -191,8 +197,9 @@ std::shared_ptr ConcurrentHashJoin::getNonJoinedBlocks( throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid join type. join kind: {}, strictness: {}", table_join->kind(), table_join->strictness()); } -std::shared_ptr ConcurrentHashJoin::buildHashExpressionAction(const Block & block, const Strings & based_columns_names, Strings & hash_columns_names) +std::pair, Strings> ConcurrentHashJoin::buildHashExpressionAction(const Block & block, const Strings & based_columns_names) { + Strings hash_columns_names; WriteBufferFromOwnString col_buf; for (size_t i = 0, sz = based_columns_names.size(); i < sz; ++i) { @@ -229,37 +236,13 @@ std::shared_ptr ConcurrentHashJoin::buildHashExpressionAction true, false, true, false); ActionsVisitor(visitor_data, visit_log.stream()).visit(func_ast); actions = visitor_data.getActions(); - return std::make_shared(actions); -} - -ConcurrentHashJoin::BlockDispatchControlData & ConcurrentHashJoin::getBlockDispatchControlData(const Block & block, TableIndex table_index) -{ - auto & data = *dispatch_datas[table_index]; - if (data.has_init)[[likely]] - return data; - std::lock_guard lock(data.mutex); - if (data.has_init) - return data; - - if (table_join->getClauses().empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "empty join clauses"); - const auto & onexpr = table_join->getClauses()[0]; - if (table_index == LEFT) - { - data.hash_expression_actions = buildHashExpressionAction(block, onexpr.key_names_left, data.hash_columns_names); - } - else - { - data.hash_expression_actions = buildHashExpressionAction(block, onexpr.key_names_right, data.hash_columns_names); - } - data.header = block.cloneEmpty(); - data.has_init = true; - return data; + return {std::make_shared(actions), hash_columns_names}; } void ConcurrentHashJoin::dispatchBlock(BlockDispatchControlData & dispatch_data, Block & from_block, std::vector & dispatched_blocks) { auto rows_before_filtration = from_block.rows(); + auto header = from_block.cloneEmpty(); dispatch_data.hash_expression_actions->execute(from_block, rows_before_filtration); for (const auto & filter_column_name : dispatch_data.hash_columns_names) { @@ -267,7 +250,7 @@ void ConcurrentHashJoin::dispatchBlock(BlockDispatchControlData & dispatch_data, auto filter_desc = std::make_unique(*full_column); auto num_filtered_rows = filter_desc->countBytesInFilter(); ColumnsWithTypeAndName filtered_block_columns; - for (size_t i = 0; i < dispatch_data.header.columns(); ++i) + for (size_t i = 0; i < header.columns(); ++i) { auto & from_column = from_block.getByPosition(i); auto filtered_column = filter_desc->filter(*from_column.column, num_filtered_rows); @@ -281,7 +264,7 @@ void ConcurrentHashJoin::waitAllAddJoinedBlocksFinished() { while (finished_add_joined_blocks_tasks < hash_joins.size())[[unlikely]] { - std::shared_ptr hash_join; + std::shared_ptr hash_join; { std::unique_lock lock(finished_add_joined_blocks_tasks_mutex); hash_join = getUnfinishedAddJoinedBlockTasks(); @@ -306,7 +289,7 @@ void ConcurrentHashJoin::waitAllAddJoinedBlocksFinished() } } -std::shared_ptr ConcurrentHashJoin::getUnfinishedAddJoinedBlockTasks() +std::shared_ptr ConcurrentHashJoin::getUnfinishedAddJoinedBlockTasks() { for (auto & hash_join : hash_joins) { diff --git a/src/Interpreters/ConcurrentHashJoin.h b/src/Interpreters/ConcurrentHashJoin.h index 35ac29c213f..e8ec4e4b48a 100644 --- a/src/Interpreters/ConcurrentHashJoin.h +++ b/src/Interpreters/ConcurrentHashJoin.h @@ -34,7 +34,7 @@ namespace JoinStuff class ConcurrentHashJoin : public IJoin { public: - explicit ConcurrentHashJoin(ContextPtr context_, std::shared_ptr table_join_, size_t slots_, const Block & right_sample_block, bool any_take_last_row_ = false); + explicit ConcurrentHashJoin(ContextPtr context_, std::shared_ptr table_join_, size_t slots_, const Block & left_sample_block, const Block & right_sample_block, bool any_take_last_row_ = false); ~ConcurrentHashJoin() override = default; const TableJoin & getTableJoin() const override { return *table_join; } @@ -50,7 +50,7 @@ public: std::shared_ptr getNonJoinedBlocks(const Block & left_sample_block, const Block & result_sample_block, UInt64 max_block_size) const override; private: - struct InnerHashJoin + struct InternalHashJoin { std::mutex mutex; std::unique_ptr data; @@ -60,7 +60,7 @@ private: ContextPtr context; std::shared_ptr table_join; size_t slots; - std::vector> hash_joins; + std::vector> hash_joins; std::atomic check_total_rows; std::atomic check_total_bytes; @@ -71,19 +71,13 @@ private: mutable std::mutex totals_mutex; Block totals; - enum TableIndex - { - LEFT = 0, - RIGHT = 1 - }; - struct BlockDispatchControlData { - std::mutex mutex; - std::atomic has_init = false; + //std::mutex mutex; + //std::atomic has_init = false; std::shared_ptr hash_expression_actions; Strings hash_columns_names; - Block header; + //Block header; BlockDispatchControlData() = default; }; @@ -91,13 +85,12 @@ private: Poco::Logger * logger = &Poco::Logger::get("ConcurrentHashJoin"); - std::shared_ptr buildHashExpressionAction(const Block & block, const Strings & based_columns_names, Strings & hash_columns_names); - BlockDispatchControlData & getBlockDispatchControlData(const Block & block, TableIndex table_index); + std::pair, Strings> buildHashExpressionAction(const Block & block, const Strings & based_columns_names); static void dispatchBlock(BlockDispatchControlData & dispatch_data, Block & from_block, std::vector & dispatched_blocks); void waitAllAddJoinedBlocksFinished(); - std::shared_ptr getUnfinishedAddJoinedBlockTasks(); + std::shared_ptr getUnfinishedAddJoinedBlockTasks(); }; } diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index c1773edf740..3da5c385241 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -60,6 +60,9 @@ #include #include +#include +#include + namespace DB { @@ -927,24 +930,24 @@ static ActionsDAGPtr createJoinedBlockActions(ContextPtr context, const TableJoi return ExpressionAnalyzer(expression_list, syntax_result, context).getActionsDAG(true, false); } -static std::shared_ptr chooseJoinAlgorithm(std::shared_ptr analyzed_join, const Block & sample_block, ContextPtr context) +static std::shared_ptr chooseJoinAlgorithm(std::shared_ptr analyzed_join, const Block left_sample_block, const Block & right_sample_block, ContextPtr context) { /// HashJoin with Dictionary optimisation - if (analyzed_join->tryInitDictJoin(sample_block, context)) - return std::make_shared(analyzed_join, sample_block); + if (analyzed_join->tryInitDictJoin(right_sample_block, context)) + return std::make_shared(analyzed_join, right_sample_block); bool allow_merge_join = analyzed_join->allowMergeJoin(); if (analyzed_join->forceHashJoin() || (analyzed_join->preferMergeJoin() && !allow_merge_join)) { if (analyzed_join->allowParallelHashJoin()) { - return std::make_shared(context, analyzed_join, context->getSettings().max_threads, sample_block); + return std::make_shared(context, analyzed_join, context->getSettings().max_threads, left_sample_block, right_sample_block); } - return std::make_shared(analyzed_join, sample_block); + return std::make_shared(analyzed_join, right_sample_block); } else if (analyzed_join->forceMergeJoin() || (analyzed_join->preferMergeJoin() && allow_merge_join)) - return std::make_shared(analyzed_join, sample_block); - return std::make_shared(analyzed_join, sample_block); + return std::make_shared(analyzed_join, right_sample_block); + return std::make_shared(analyzed_join, right_sample_block); } static std::unique_ptr buildJoinedPlan( @@ -1032,7 +1035,8 @@ JoinPtr SelectQueryExpressionAnalyzer::makeTableJoin( joined_plan->addStep(std::move(converting_step)); } - JoinPtr join = chooseJoinAlgorithm(analyzed_join, joined_plan->getCurrentDataStream().header, getContext()); + Block left_sample_block(left_columns); + JoinPtr join = chooseJoinAlgorithm(analyzed_join, left_sample_block, joined_plan->getCurrentDataStream().header, getContext()); /// Do not make subquery for join over dictionary. if (analyzed_join->getDictionaryReader()) diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp index a0e47a429d7..e70db4a2928 100644 --- a/src/Interpreters/TableJoin.cpp +++ b/src/Interpreters/TableJoin.cpp @@ -754,7 +754,8 @@ bool TableJoin::allowParallelHashJoin() const return false; if (table_join.kind != ASTTableJoin::Kind::Left && table_join.kind != ASTTableJoin::Kind::Inner) return false; - + if (isSpecialStorage() || !oneDisjunct()) + return false; return true; } From 60964b207d466c931812b96ed57ed0651678ee10 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Thu, 21 Apr 2022 13:19:33 +0800 Subject: [PATCH 029/106] update addJoinedBlock() --- src/Interpreters/ConcurrentHashJoin.cpp | 91 +++++++------------------ src/Interpreters/ConcurrentHashJoin.h | 11 +-- 2 files changed, 26 insertions(+), 76 deletions(-) diff --git a/src/Interpreters/ConcurrentHashJoin.cpp b/src/Interpreters/ConcurrentHashJoin.cpp index 82846b99cbb..8dd7e461044 100644 --- a/src/Interpreters/ConcurrentHashJoin.cpp +++ b/src/Interpreters/ConcurrentHashJoin.cpp @@ -62,37 +62,38 @@ bool ConcurrentHashJoin::addJoinedBlock(const Block & block, bool check_limits) std::vector dispatched_blocks; Block cloned_block = block; dispatchBlock(dispatch_data, cloned_block, dispatched_blocks); - for (size_t i = 0; i < dispatched_blocks.size(); ++i) - { - auto & hash_join = hash_joins[i]; - auto & dispatched_block = dispatched_blocks[i]; - auto rows = dispatched_block.rows(); - check_total_rows += rows; - check_total_bytes += dispatched_block.bytes(); - std::unique_lock lock(hash_join->mutex); - // Don't take the real insertion here, because inserting a block into HashTable is a time-consuming operation, - // it may cause serious lock contention and make the whole process slow. - hash_join->pending_right_blocks.emplace_back(std::move(dispatched_block)); + std::list pending_blocks; + for (size_t i = 0; i < dispatched_blocks.size(); ++i) + pending_blocks.emplace_back(i); + while (!pending_blocks.empty()) + { + for (auto iter = pending_blocks.begin(); iter != pending_blocks.end();) + { + auto & i = *iter; + auto & hash_join = hash_joins[i]; + auto & dispatched_block = dispatched_blocks[i]; + if (hash_join->mutex.try_lock()) + { + hash_join->data->addJoinedBlock(dispatched_block, check_limits); + + hash_join->mutex.unlock(); + iter = pending_blocks.erase(iter); + } + else + { + iter++; + } + } } if (check_limits) - return table_join->sizeLimits().check( - check_total_rows.load(), check_total_bytes.load(), "JOIN", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED); + return table_join->sizeLimits().check(getTotalRowCount(), getTotalByteCount(), "JOIN", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED); return true; } -void ConcurrentHashJoin::joinBlock(Block & block, std::shared_ptr & not_processed) +void ConcurrentHashJoin::joinBlock(Block & block, std::shared_ptr & /*not_processed*/) { - if (block.rows()) - waitAllAddJoinedBlocksFinished(); - else - { - std::unique_lock lock(hash_joins[0]->mutex); - hash_joins[0]->data->joinBlock(block, not_processed); - return; - } - auto & dispatch_data = *dispatch_datas[0]; std::vector dispatched_blocks; Block cloned_block = block; @@ -179,7 +180,7 @@ bool ConcurrentHashJoin::alwaysReturnsEmptySet() const { for (const auto & hash_join : hash_joins) { - if (!hash_join->data->alwaysReturnsEmptySet() || !hash_join->pending_right_blocks.empty()) + if (!hash_join->data->alwaysReturnsEmptySet()) return false; } return true; @@ -260,47 +261,5 @@ void ConcurrentHashJoin::dispatchBlock(BlockDispatchControlData & dispatch_data, } } -void ConcurrentHashJoin::waitAllAddJoinedBlocksFinished() -{ - while (finished_add_joined_blocks_tasks < hash_joins.size())[[unlikely]] - { - std::shared_ptr hash_join; - { - std::unique_lock lock(finished_add_joined_blocks_tasks_mutex); - hash_join = getUnfinishedAddJoinedBlockTasks(); - if (!hash_join) - { - while (finished_add_joined_blocks_tasks < hash_joins.size()) - { - finished_add_joined_blocks_tasks_cond.wait(lock); - } - return; - } - } - - while (!hash_join->pending_right_blocks.empty()) - { - Block & block = hash_join->pending_right_blocks.front(); - hash_join->data->addJoinedBlock(block, true); - hash_join->pending_right_blocks.pop_front(); - } - finished_add_joined_blocks_tasks += 1; - finished_add_joined_blocks_tasks_cond.notify_all(); - } -} - -std::shared_ptr ConcurrentHashJoin::getUnfinishedAddJoinedBlockTasks() -{ - for (auto & hash_join : hash_joins) - { - if (!hash_join->in_inserting) - { - hash_join->in_inserting = true; - return hash_join; - } - } - return nullptr; -} - } } diff --git a/src/Interpreters/ConcurrentHashJoin.h b/src/Interpreters/ConcurrentHashJoin.h index e8ec4e4b48a..c87604e18e1 100644 --- a/src/Interpreters/ConcurrentHashJoin.h +++ b/src/Interpreters/ConcurrentHashJoin.h @@ -54,15 +54,12 @@ private: { std::mutex mutex; std::unique_ptr data; - std::list pending_right_blocks; - bool in_inserting = false; }; + ContextPtr context; std::shared_ptr table_join; size_t slots; std::vector> hash_joins; - std::atomic check_total_rows; - std::atomic check_total_bytes; std::mutex finished_add_joined_blocks_tasks_mutex; std::condition_variable finished_add_joined_blocks_tasks_cond; @@ -73,11 +70,8 @@ private: struct BlockDispatchControlData { - //std::mutex mutex; - //std::atomic has_init = false; std::shared_ptr hash_expression_actions; Strings hash_columns_names; - //Block header; BlockDispatchControlData() = default; }; @@ -89,9 +83,6 @@ private: static void dispatchBlock(BlockDispatchControlData & dispatch_data, Block & from_block, std::vector & dispatched_blocks); - void waitAllAddJoinedBlocksFinished(); - std::shared_ptr getUnfinishedAddJoinedBlockTasks(); - }; } } From eac810ed85ff471dc6b029479dddbd75d1c1ad02 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Thu, 21 Apr 2022 14:10:09 +0800 Subject: [PATCH 030/106] use concatenateBlocks --- src/Interpreters/ConcurrentHashJoin.cpp | 28 +------------------------ 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/src/Interpreters/ConcurrentHashJoin.cpp b/src/Interpreters/ConcurrentHashJoin.cpp index 8dd7e461044..a102640ba22 100644 --- a/src/Interpreters/ConcurrentHashJoin.cpp +++ b/src/Interpreters/ConcurrentHashJoin.cpp @@ -108,33 +108,7 @@ void ConcurrentHashJoin::joinBlock(Block & block, std::shared_ptr & throw Exception(ErrorCodes::LOGICAL_ERROR, "not_processed should be empty"); } - ColumnsWithTypeAndName final_columns; - MutableColumns mutable_final_columns; - NamesAndTypesList names_and_types = dispatched_blocks[0].getNamesAndTypesList(); - auto types = names_and_types.getTypes(); - for (auto & dispatched_block : dispatched_blocks) - { - for (size_t pos = 0; pos < dispatched_block.columns(); ++pos) - { - auto & from_column = dispatched_block.getByPosition(pos); - if (mutable_final_columns.size() <= pos) - { - mutable_final_columns.emplace_back(from_column.column->cloneEmpty()); - } - if (!from_column.column->empty()) - { - mutable_final_columns[pos]->insertRangeFrom(*from_column.column, 0, from_column.column->size()); - } - } - } - - size_t i = 0; - for (auto & name_and_type : names_and_types) - { - final_columns.emplace_back(ColumnWithTypeAndName(std::move(mutable_final_columns[i]), name_and_type.type, name_and_type.name)); - i += 1; - } - block = Block(final_columns); + block = concatenateBlocks(dispatched_blocks); } void ConcurrentHashJoin::checkTypesOfKeys(const Block & block) const From e27d67ce8ab11f7f14863e526fd5af7899dad3c0 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Thu, 21 Apr 2022 14:28:22 +0800 Subject: [PATCH 031/106] update type name --- src/Interpreters/ConcurrentHashJoin.cpp | 6 +++--- src/Interpreters/ConcurrentHashJoin.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/ConcurrentHashJoin.cpp b/src/Interpreters/ConcurrentHashJoin.cpp index a102640ba22..a8187b457cf 100644 --- a/src/Interpreters/ConcurrentHashJoin.cpp +++ b/src/Interpreters/ConcurrentHashJoin.cpp @@ -59,7 +59,7 @@ ConcurrentHashJoin::ConcurrentHashJoin(ContextPtr context_, std::shared_ptr dispatched_blocks; + Blocks dispatched_blocks; Block cloned_block = block; dispatchBlock(dispatch_data, cloned_block, dispatched_blocks); @@ -95,7 +95,7 @@ bool ConcurrentHashJoin::addJoinedBlock(const Block & block, bool check_limits) void ConcurrentHashJoin::joinBlock(Block & block, std::shared_ptr & /*not_processed*/) { auto & dispatch_data = *dispatch_datas[0]; - std::vector dispatched_blocks; + Blocks dispatched_blocks; Block cloned_block = block; dispatchBlock(dispatch_data, cloned_block, dispatched_blocks); for (size_t i = 0; i < dispatched_blocks.size(); ++i) @@ -214,7 +214,7 @@ std::pair, Strings> ConcurrentHashJoin::build return {std::make_shared(actions), hash_columns_names}; } -void ConcurrentHashJoin::dispatchBlock(BlockDispatchControlData & dispatch_data, Block & from_block, std::vector & dispatched_blocks) +void ConcurrentHashJoin::dispatchBlock(BlockDispatchControlData & dispatch_data, Block & from_block, Blocks & dispatched_blocks) { auto rows_before_filtration = from_block.rows(); auto header = from_block.cloneEmpty(); diff --git a/src/Interpreters/ConcurrentHashJoin.h b/src/Interpreters/ConcurrentHashJoin.h index c87604e18e1..066fe7fefdb 100644 --- a/src/Interpreters/ConcurrentHashJoin.h +++ b/src/Interpreters/ConcurrentHashJoin.h @@ -81,7 +81,7 @@ private: std::pair, Strings> buildHashExpressionAction(const Block & block, const Strings & based_columns_names); - static void dispatchBlock(BlockDispatchControlData & dispatch_data, Block & from_block, std::vector & dispatched_blocks); + static void dispatchBlock(BlockDispatchControlData & dispatch_data, Block & from_block, Blocks & dispatched_blocks); }; } From a9d9abb7a172e20aded3155cf528878e76846b3b Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Thu, 21 Apr 2022 16:59:30 +0800 Subject: [PATCH 032/106] use column::scatter() --- src/Interpreters/ConcurrentHashJoin.cpp | 83 ++++++++++++++++--------- src/Interpreters/ConcurrentHashJoin.h | 6 +- 2 files changed, 57 insertions(+), 32 deletions(-) diff --git a/src/Interpreters/ConcurrentHashJoin.cpp b/src/Interpreters/ConcurrentHashJoin.cpp index a8187b457cf..ab4ea242882 100644 --- a/src/Interpreters/ConcurrentHashJoin.cpp +++ b/src/Interpreters/ConcurrentHashJoin.cpp @@ -14,12 +14,14 @@ #include #include #include +#include #include #include #include #include +#include #include -#include "base/logger_useful.h" +#include namespace DB { namespace ErrorCodes @@ -35,7 +37,7 @@ ConcurrentHashJoin::ConcurrentHashJoin(ContextPtr context_, std::shared_ptr= 256) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid argument slot : {}", slots_); } @@ -50,10 +52,10 @@ ConcurrentHashJoin::ConcurrentHashJoin(ContextPtr context_, std::shared_ptr(), std::make_shared()}; const auto & onexpr = table_join->getClauses()[0]; auto & left_dispatch_data = *dispatch_datas[0]; - std::tie(left_dispatch_data.hash_expression_actions, left_dispatch_data.hash_columns_names) = buildHashExpressionAction(left_sample_block, onexpr.key_names_left); + std::tie(left_dispatch_data.hash_expression_actions, left_dispatch_data.hash_column_name) = buildHashExpressionAction(left_sample_block, onexpr.key_names_left); auto & right_dispatch_data = *dispatch_datas[1]; - std::tie(right_dispatch_data.hash_expression_actions, right_dispatch_data.hash_columns_names) = buildHashExpressionAction(right_sample_block, onexpr.key_names_right); + std::tie(right_dispatch_data.hash_expression_actions, right_dispatch_data.hash_column_name) = buildHashExpressionAction(right_sample_block, onexpr.key_names_right); } bool ConcurrentHashJoin::addJoinedBlock(const Block & block, bool check_limits) @@ -80,7 +82,7 @@ bool ConcurrentHashJoin::addJoinedBlock(const Block & block, bool check_limits) hash_join->mutex.unlock(); iter = pending_blocks.erase(iter); } - else + else { iter++; } @@ -172,7 +174,7 @@ std::shared_ptr ConcurrentHashJoin::getNonJoinedBlocks( throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid join type. join kind: {}, strictness: {}", table_join->kind(), table_join->strictness()); } -std::pair, Strings> ConcurrentHashJoin::buildHashExpressionAction(const Block & block, const Strings & based_columns_names) +std::pair, String> ConcurrentHashJoin::buildHashExpressionAction(const Block & block, const Strings & based_columns_names) { Strings hash_columns_names; WriteBufferFromOwnString col_buf; @@ -183,17 +185,12 @@ std::pair, Strings> ConcurrentHashJoin::build col_buf << based_columns_names[i]; } WriteBufferFromOwnString write_buf; - for (size_t i = 0; i < slots; ++i) - { - if (i) - write_buf << ","; - write_buf << "cityHash64(" << col_buf.str() << ")%" << slots << "=" << i; - } + write_buf << "cityHash64(" << col_buf.str() << ") % " << slots; + auto settings = context->getSettings(); ParserExpressionList hash_expr_parser(true); ASTPtr func_ast = parseQuery(hash_expr_parser, write_buf.str(), "Parse Block hash expression", settings.max_query_size, settings.max_parser_depth); - for (auto & child : func_ast->children) - hash_columns_names.emplace_back(child->getColumnName()); + auto hash_column_name = func_ast->children[0]->getColumnName(); DebugASTLog visit_log; const auto & names_and_types = block.getNamesAndTypesList(); @@ -211,28 +208,56 @@ std::pair, Strings> ConcurrentHashJoin::build true, false, true, false); ActionsVisitor(visitor_data, visit_log.stream()).visit(func_ast); actions = visitor_data.getActions(); - return {std::make_shared(actions), hash_columns_names}; + return {std::make_shared(actions), hash_column_name}; } void ConcurrentHashJoin::dispatchBlock(BlockDispatchControlData & dispatch_data, Block & from_block, Blocks & dispatched_blocks) { - auto rows_before_filtration = from_block.rows(); auto header = from_block.cloneEmpty(); - dispatch_data.hash_expression_actions->execute(from_block, rows_before_filtration); - for (const auto & filter_column_name : dispatch_data.hash_columns_names) + auto num_shards = hash_joins.size(); + Block block_for_build_selector = from_block; + dispatch_data.hash_expression_actions->execute(block_for_build_selector); + auto selector_column = block_for_build_selector.getByName(dispatch_data.hash_column_name); + std::vector selector_slots; + for (UInt64 i = 0; i < num_shards; ++i) { - auto full_column = from_block.findByName(filter_column_name)->column->convertToFullColumnIfConst(); - auto filter_desc = std::make_unique(*full_column); - auto num_filtered_rows = filter_desc->countBytesInFilter(); - ColumnsWithTypeAndName filtered_block_columns; - for (size_t i = 0; i < header.columns(); ++i) - { - auto & from_column = from_block.getByPosition(i); - auto filtered_column = filter_desc->filter(*from_column.column, num_filtered_rows); - filtered_block_columns.emplace_back(filtered_column, from_column.type, from_column.name); - } - dispatched_blocks.emplace_back(std::move(filtered_block_columns)); + selector_slots.emplace_back(i); + dispatched_blocks.emplace_back(from_block.cloneEmpty()); } + if (selector_column.column->isNullable()) + { + const auto * nullable_col = typeid_cast(selector_column.column.get()); + const auto & nested_col = nullable_col->getNestedColumnPtr(); + size_t last_offset = 0; + MutableColumnPtr dst = nullable_col->cloneEmpty(); + for (size_t i = 0, sz = selector_column.column->size(); i < sz; ++i) + { + if (selector_column.column->isNullAt(i))[[unlikely]] + { + if (i > last_offset)[[likely]] + dst->insertRangeFrom(*nested_col, last_offset, i - last_offset); + dst->insertDefault(); + last_offset = i + 1; + } + } + if (last_offset < selector_column.column->size()) + { + dst->insertRangeFrom(*nested_col, last_offset, selector_column.column->size() - last_offset); + } + selector_column.column = std::move(dst); + } + auto selector = createBlockSelector(*selector_column.column, selector_slots); + + auto columns_in_block = header.columns(); + for (size_t i = 0; i < columns_in_block; ++i) + { + auto dispatched_columns = from_block.getByPosition(i).column->scatter(num_shards, selector); + for (size_t block_index = 0; block_index < num_shards; ++block_index) + { + dispatched_blocks[block_index].getByPosition(i).column = std::move(dispatched_columns[block_index]); + } + } + } } diff --git a/src/Interpreters/ConcurrentHashJoin.h b/src/Interpreters/ConcurrentHashJoin.h index 066fe7fefdb..e1f73d38a75 100644 --- a/src/Interpreters/ConcurrentHashJoin.h +++ b/src/Interpreters/ConcurrentHashJoin.h @@ -71,7 +71,7 @@ private: struct BlockDispatchControlData { std::shared_ptr hash_expression_actions; - Strings hash_columns_names; + String hash_column_name; BlockDispatchControlData() = default; }; @@ -79,9 +79,9 @@ private: Poco::Logger * logger = &Poco::Logger::get("ConcurrentHashJoin"); - std::pair, Strings> buildHashExpressionAction(const Block & block, const Strings & based_columns_names); + std::pair, String> buildHashExpressionAction(const Block & block, const Strings & based_columns_names); - static void dispatchBlock(BlockDispatchControlData & dispatch_data, Block & from_block, Blocks & dispatched_blocks); + void dispatchBlock(BlockDispatchControlData & dispatch_data, Block & from_block, Blocks & dispatched_blocks); }; } From fbd549c6acefbf94125ba692042b9a9414f955ff Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Thu, 21 Apr 2022 18:07:44 +0800 Subject: [PATCH 033/106] update test scripts --- .../queries/1_stateful/00172_parallel_join.reference | 12 ++++++------ tests/queries/1_stateful/00172_parallel_join.sql | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/queries/1_stateful/00172_parallel_join.reference b/tests/queries/1_stateful/00172_parallel_join.reference index 9ba73024fa2..039f6ca9967 100644 --- a/tests/queries/1_stateful/00172_parallel_join.reference +++ b/tests/queries/1_stateful/00172_parallel_join.reference @@ -282,18 +282,18 @@ hurpass.com 49671 1251 1391426411567902 Поиск ['Поиск'] 1391426411567902 Поиск ['Поиск'] 1391426411567902 Эксперимент /f/1/global/banners_v1_1.js ['Эксперимент'] -4 4 2 0 0 0 -8 8 4 2 2 1 +4 4 2 6 6 3 -1 1 2 -1 1 3 -4 4 8 -4 4 9 +8 8 4 0 0 0 0 0 1 +1 1 2 +1 1 3 2 2 4 2 2 5 3 3 6 3 3 7 +4 4 8 +4 4 9 diff --git a/tests/queries/1_stateful/00172_parallel_join.sql b/tests/queries/1_stateful/00172_parallel_join.sql index b021b040dc2..fce41d7a761 100644 --- a/tests/queries/1_stateful/00172_parallel_join.sql +++ b/tests/queries/1_stateful/00172_parallel_join.sql @@ -190,7 +190,7 @@ ANY INNER JOIN ( SELECT number * 2 AS k, number AS joined FROM system.numbers LIMIT 10 ) AS b -USING k +USING k ORDER BY joined SETTINGS any_join_distinct_right_table_keys = 1; SELECT a.*, b.* FROM @@ -201,4 +201,4 @@ ALL INNER JOIN ( SELECT intDiv(number, 2) AS k, number AS joined FROM system.numbers LIMIT 10 ) AS b -USING k; +USING k ORDER BY joined; From 32c8a0cef88c5127a513d8e9ee1004d77a836c4c Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Fri, 22 Apr 2022 09:52:02 +0800 Subject: [PATCH 034/106] update test case 01049_join_low_card_bug_long --- .../01049_join_low_card_bug_long.reference | 64 +++++++-------- .../01049_join_low_card_bug_long.sql.j2 | 82 ++++++++++--------- 2 files changed, 74 insertions(+), 72 deletions(-) diff --git a/tests/queries/0_stateless/01049_join_low_card_bug_long.reference b/tests/queries/0_stateless/01049_join_low_card_bug_long.reference index 6d9f36df075..c5d02a66ba5 100644 --- a/tests/queries/0_stateless/01049_join_low_card_bug_long.reference +++ b/tests/queries/0_stateless/01049_join_low_card_bug_long.reference @@ -1,83 +1,83 @@ -- join_algorithm = default, join_use_nulls = 0 -- str LowCardinality(String) LowCardinality(String) -str_r LowCardinality(String) str LowCardinality(String) +str_r LowCardinality(String) str LowCardinality(String) LowCardinality(String) str_l LowCardinality(String) -str_r LowCardinality(String) str LowCardinality(String) +str_r LowCardinality(String) str_l LowCardinality(String) LowCardinality(String) LowCardinality(String) str str LowCardinality(String) LowCardinality(String) str str LowCardinality(String) LowCardinality(String) str_r str_r LowCardinality(String) LowCardinality(String) -LowCardinality(String) LowCardinality(String) str_r str_r LowCardinality(String) LowCardinality(String) LowCardinality(String) LowCardinality(String) str str LowCardinality(String) LowCardinality(String) str str +LowCardinality(String) LowCardinality(String) str_r str_r LowCardinality(String) LowCardinality(String) LowCardinality(String) LowCardinality(String) str str LowCardinality(String) LowCardinality(String) str str LowCardinality(String) LowCardinality(String) str_r str_r LowCardinality(String) LowCardinality(String) LowCardinality(String) LowCardinality(String) LowCardinality(String) LowCardinality(String) str_l str_l -LowCardinality(String) LowCardinality(String) str_r str_r LowCardinality(String) LowCardinality(String) LowCardinality(String) LowCardinality(String) str str LowCardinality(String) LowCardinality(String) str str +LowCardinality(String) LowCardinality(String) str_r str_r LowCardinality(String) LowCardinality(String) LowCardinality(String) LowCardinality(String) LowCardinality(String) LowCardinality(String) str_l str_l str LowCardinality(String) LowCardinality(String) -str_r LowCardinality(String) str LowCardinality(String) +str_r LowCardinality(String) str LowCardinality(String) LowCardinality(String) str_l LowCardinality(String) -str_r LowCardinality(String) str LowCardinality(String) +str_r LowCardinality(String) str_l LowCardinality(String) String String str str LowCardinality(String) LowCardinality(String) str str String String str_r str_r LowCardinality(String) LowCardinality(String) -String String str_r str_r LowCardinality(String) LowCardinality(String) String String str str LowCardinality(String) LowCardinality(String) str str +String String str_r str_r LowCardinality(String) LowCardinality(String) String String str str LowCardinality(String) LowCardinality(String) str str String String str_r str_r LowCardinality(String) LowCardinality(String) String String LowCardinality(String) LowCardinality(String) str_l str_l -String String str_r str_r LowCardinality(String) LowCardinality(String) String String str str LowCardinality(String) LowCardinality(String) str str +String String str_r str_r LowCardinality(String) LowCardinality(String) String String LowCardinality(String) LowCardinality(String) str_l str_l str String String -str_r String str String +str_r String str String String str_l String -str_r String str String +str_r String str_l String LowCardinality(String) LowCardinality(String) str str String String str str LowCardinality(String) LowCardinality(String) str_r str_r String String -LowCardinality(String) LowCardinality(String) str_r str_r String String LowCardinality(String) LowCardinality(String) str str String String str str +LowCardinality(String) LowCardinality(String) str_r str_r String String LowCardinality(String) LowCardinality(String) str str String String str str LowCardinality(String) LowCardinality(String) str_r str_r String String LowCardinality(String) LowCardinality(String) String String str_l str_l -LowCardinality(String) LowCardinality(String) str_r str_r String String LowCardinality(String) LowCardinality(String) str str String String str str +LowCardinality(String) LowCardinality(String) str_r str_r String String LowCardinality(String) LowCardinality(String) String String str_l str_l str LowCardinality(String) LowCardinality(String) -str_r LowCardinality(String) str LowCardinality(String) +str_r LowCardinality(String) str LowCardinality(String) LowCardinality(String) str_l LowCardinality(String) -str_r LowCardinality(String) str LowCardinality(String) +str_r LowCardinality(String) str_l LowCardinality(String) Nullable(String) Nullable(String) str str LowCardinality(String) LowCardinality(String) str str Nullable(String) Nullable(String) str_r str_r LowCardinality(String) LowCardinality(String) -Nullable(String) Nullable(String) str_r str_r LowCardinality(String) LowCardinality(String) Nullable(String) Nullable(String) str str LowCardinality(String) LowCardinality(String) str str +Nullable(String) Nullable(String) str_r str_r LowCardinality(String) LowCardinality(String) Nullable(String) Nullable(String) str str LowCardinality(String) LowCardinality(String) str str Nullable(String) Nullable(String) str_r str_r LowCardinality(String) LowCardinality(String) Nullable(String) Nullable(String) \N \N LowCardinality(String) LowCardinality(String) str_l str_l -Nullable(String) Nullable(String) str_r str_r LowCardinality(String) LowCardinality(String) Nullable(String) Nullable(String) str str LowCardinality(String) LowCardinality(String) str str +Nullable(String) Nullable(String) str_r str_r LowCardinality(String) LowCardinality(String) Nullable(String) Nullable(String) \N \N LowCardinality(String) LowCardinality(String) str_l str_l str Nullable(String) \N Nullable(String) @@ -203,83 +203,83 @@ LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) str_r str_r Nu -- join_algorithm = partial_merge, join_use_nulls = 0 -- str LowCardinality(String) LowCardinality(String) -str_r LowCardinality(String) str LowCardinality(String) +str_r LowCardinality(String) str LowCardinality(String) LowCardinality(String) str_l LowCardinality(String) -str_r LowCardinality(String) str LowCardinality(String) +str_r LowCardinality(String) str_l LowCardinality(String) LowCardinality(String) LowCardinality(String) str str LowCardinality(String) LowCardinality(String) str str LowCardinality(String) LowCardinality(String) str_r str_r LowCardinality(String) LowCardinality(String) -LowCardinality(String) LowCardinality(String) str_r str_r LowCardinality(String) LowCardinality(String) LowCardinality(String) LowCardinality(String) str str LowCardinality(String) LowCardinality(String) str str +LowCardinality(String) LowCardinality(String) str_r str_r LowCardinality(String) LowCardinality(String) LowCardinality(String) LowCardinality(String) str str LowCardinality(String) LowCardinality(String) str str LowCardinality(String) LowCardinality(String) str_r str_r LowCardinality(String) LowCardinality(String) LowCardinality(String) LowCardinality(String) LowCardinality(String) LowCardinality(String) str_l str_l -LowCardinality(String) LowCardinality(String) str_r str_r LowCardinality(String) LowCardinality(String) LowCardinality(String) LowCardinality(String) str str LowCardinality(String) LowCardinality(String) str str +LowCardinality(String) LowCardinality(String) str_r str_r LowCardinality(String) LowCardinality(String) LowCardinality(String) LowCardinality(String) LowCardinality(String) LowCardinality(String) str_l str_l str LowCardinality(String) LowCardinality(String) -str_r LowCardinality(String) str LowCardinality(String) +str_r LowCardinality(String) str LowCardinality(String) LowCardinality(String) str_l LowCardinality(String) -str_r LowCardinality(String) str LowCardinality(String) +str_r LowCardinality(String) str_l LowCardinality(String) String String str str LowCardinality(String) LowCardinality(String) str str String String str_r str_r LowCardinality(String) LowCardinality(String) -String String str_r str_r LowCardinality(String) LowCardinality(String) String String str str LowCardinality(String) LowCardinality(String) str str +String String str_r str_r LowCardinality(String) LowCardinality(String) String String str str LowCardinality(String) LowCardinality(String) str str String String str_r str_r LowCardinality(String) LowCardinality(String) String String LowCardinality(String) LowCardinality(String) str_l str_l -String String str_r str_r LowCardinality(String) LowCardinality(String) String String str str LowCardinality(String) LowCardinality(String) str str +String String str_r str_r LowCardinality(String) LowCardinality(String) String String LowCardinality(String) LowCardinality(String) str_l str_l str String String -str_r String str String +str_r String str String String str_l String -str_r String str String +str_r String str_l String LowCardinality(String) LowCardinality(String) str str String String str str LowCardinality(String) LowCardinality(String) str_r str_r String String -LowCardinality(String) LowCardinality(String) str_r str_r String String LowCardinality(String) LowCardinality(String) str str String String str str +LowCardinality(String) LowCardinality(String) str_r str_r String String LowCardinality(String) LowCardinality(String) str str String String str str LowCardinality(String) LowCardinality(String) str_r str_r String String LowCardinality(String) LowCardinality(String) String String str_l str_l -LowCardinality(String) LowCardinality(String) str_r str_r String String LowCardinality(String) LowCardinality(String) str str String String str str +LowCardinality(String) LowCardinality(String) str_r str_r String String LowCardinality(String) LowCardinality(String) String String str_l str_l str LowCardinality(String) LowCardinality(String) -str_r LowCardinality(String) str LowCardinality(String) +str_r LowCardinality(String) str LowCardinality(String) LowCardinality(String) str_l LowCardinality(String) -str_r LowCardinality(String) str LowCardinality(String) +str_r LowCardinality(String) str_l LowCardinality(String) Nullable(String) Nullable(String) str str LowCardinality(String) LowCardinality(String) str str Nullable(String) Nullable(String) str_r str_r LowCardinality(String) LowCardinality(String) -Nullable(String) Nullable(String) str_r str_r LowCardinality(String) LowCardinality(String) Nullable(String) Nullable(String) str str LowCardinality(String) LowCardinality(String) str str +Nullable(String) Nullable(String) str_r str_r LowCardinality(String) LowCardinality(String) Nullable(String) Nullable(String) str str LowCardinality(String) LowCardinality(String) str str Nullable(String) Nullable(String) str_r str_r LowCardinality(String) LowCardinality(String) Nullable(String) Nullable(String) \N \N LowCardinality(String) LowCardinality(String) str_l str_l -Nullable(String) Nullable(String) str_r str_r LowCardinality(String) LowCardinality(String) Nullable(String) Nullable(String) str str LowCardinality(String) LowCardinality(String) str str +Nullable(String) Nullable(String) str_r str_r LowCardinality(String) LowCardinality(String) Nullable(String) Nullable(String) \N \N LowCardinality(String) LowCardinality(String) str_l str_l str Nullable(String) \N Nullable(String) diff --git a/tests/queries/0_stateless/01049_join_low_card_bug_long.sql.j2 b/tests/queries/0_stateless/01049_join_low_card_bug_long.sql.j2 index b19addfb6be..6328c16085b 100644 --- a/tests/queries/0_stateless/01049_join_low_card_bug_long.sql.j2 +++ b/tests/queries/0_stateless/01049_join_low_card_bug_long.sql.j2 @@ -30,63 +30,64 @@ SELECT '-- join_algorithm = {{ join_algorithm or 'default' }}, join_use_nulls = {% if join_algorithm %}SET join_algorithm = '{{ join_algorithm }}';{% endif -%} SET join_use_nulls = {{ join_use_nulls }}; -SELECT lc, toTypeName(lc) FROM l_lc AS l RIGHT JOIN r_lc AS r USING (x) ORDER BY x; -SELECT lc, toTypeName(lc) FROM l_lc AS l RIGHT JOIN r_lc AS r USING (lc) ORDER BY x; -SELECT lc, toTypeName(lc) FROM l_lc AS l FULL JOIN r_lc AS r USING (x) ORDER BY x; -SELECT lc, toTypeName(lc) FROM l_lc AS l FULL JOIN r_lc AS r USING (lc) ORDER BY x; -SELECT toTypeName(r.lc), toTypeName(materialize(r.lc)), r.lc, materialize(r.lc), toTypeName(l.lc), toTypeName(materialize(l.lc)), l.lc, materialize(l.lc) FROM l_lc AS l RIGHT JOIN r_lc AS r USING (x) ORDER BY x; -SELECT toTypeName(r.lc), toTypeName(materialize(r.lc)), r.lc, materialize(r.lc), toTypeName(l.lc), toTypeName(materialize(l.lc)), l.lc, materialize(l.lc) FROM l_lc AS l RIGHT JOIN r_lc AS r USING (lc) ORDER BY x; -SELECT toTypeName(r.lc), toTypeName(materialize(r.lc)), r.lc, materialize(r.lc), toTypeName(l.lc), toTypeName(materialize(l.lc)), l.lc, materialize(l.lc) FROM l_lc AS l FULL JOIN r_lc AS r USING (x) ORDER BY x; -SELECT toTypeName(r.lc), toTypeName(materialize(r.lc)), r.lc, materialize(r.lc), toTypeName(l.lc), toTypeName(materialize(l.lc)), l.lc, materialize(l.lc) FROM l_lc AS l FULL JOIN r_lc AS r USING (lc) ORDER BY x; +SELECT lc, toTypeName(lc) FROM l_lc AS l RIGHT JOIN r_lc AS r USING (x) ORDER BY x, lc; +SELECT lc, toTypeName(lc) FROM l_lc AS l RIGHT JOIN r_lc AS r USING (lc) ORDER BY x, lc; +SELECT lc, toTypeName(lc) FROM l_lc AS l FULL JOIN r_lc AS r USING (x) ORDER BY x, lc; +SELECT lc, toTypeName(lc) FROM l_lc AS l FULL JOIN r_lc AS r USING (lc) ORDER BY x, lc; + +SELECT toTypeName(r.lc), toTypeName(materialize(r.lc)), r.lc, materialize(r.lc), toTypeName(l.lc), toTypeName(materialize(l.lc)), l.lc, materialize(l.lc) FROM l_lc AS l RIGHT JOIN r_lc AS r USING (x) ORDER BY x, r.lc, l.lc; +SELECT toTypeName(r.lc), toTypeName(materialize(r.lc)), r.lc, materialize(r.lc), toTypeName(l.lc), toTypeName(materialize(l.lc)), l.lc, materialize(l.lc) FROM l_lc AS l RIGHT JOIN r_lc AS r USING (lc) ORDER BY x, r.lc, l.lc; +SELECT toTypeName(r.lc), toTypeName(materialize(r.lc)), r.lc, materialize(r.lc), toTypeName(l.lc), toTypeName(materialize(l.lc)), l.lc, materialize(l.lc) FROM l_lc AS l FULL JOIN r_lc AS r USING (x) ORDER BY x, r.lc, l.lc; +SELECT toTypeName(r.lc), toTypeName(materialize(r.lc)), r.lc, materialize(r.lc), toTypeName(l.lc), toTypeName(materialize(l.lc)), l.lc, materialize(l.lc) FROM l_lc AS l FULL JOIN r_lc AS r USING (lc) ORDER BY x, r.lc, l.lc; -- -SELECT lc, toTypeName(lc) FROM l_lc AS l RIGHT JOIN r USING (x) ORDER BY x; -SELECT lc, toTypeName(lc) FROM l_lc AS l RIGHT JOIN r USING (lc) ORDER BY x; -SELECT lc, toTypeName(lc) FROM l_lc AS l FULL JOIN r USING (x) ORDER BY x; -SELECT lc, toTypeName(lc) FROM l_lc AS l FULL JOIN r USING (lc) ORDER BY x; +SELECT lc, toTypeName(lc) FROM l_lc AS l RIGHT JOIN r USING (x) ORDER BY x, lc; +SELECT lc, toTypeName(lc) FROM l_lc AS l RIGHT JOIN r USING (lc) ORDER BY x, lc; +SELECT lc, toTypeName(lc) FROM l_lc AS l FULL JOIN r USING (x) ORDER BY x, lc; +SELECT lc, toTypeName(lc) FROM l_lc AS l FULL JOIN r USING (lc) ORDER BY x, lc; -SELECT toTypeName(r.lc), toTypeName(materialize(r.lc)), r.lc, materialize(r.lc), toTypeName(l.lc), toTypeName(materialize(l.lc)), l.lc, materialize(l.lc) FROM l_lc AS l RIGHT JOIN r USING (x) ORDER BY x; -SELECT toTypeName(r.lc), toTypeName(materialize(r.lc)), r.lc, materialize(r.lc), toTypeName(l.lc), toTypeName(materialize(l.lc)), l.lc, materialize(l.lc) FROM l_lc AS l RIGHT JOIN r USING (lc) ORDER BY x; -SELECT toTypeName(r.lc), toTypeName(materialize(r.lc)), r.lc, materialize(r.lc), toTypeName(l.lc), toTypeName(materialize(l.lc)), l.lc, materialize(l.lc) FROM l_lc AS l FULL JOIN r USING (x) ORDER BY x; -SELECT toTypeName(r.lc), toTypeName(materialize(r.lc)), r.lc, materialize(r.lc), toTypeName(l.lc), toTypeName(materialize(l.lc)), l.lc, materialize(l.lc) FROM l_lc AS l FULL JOIN r USING (lc) ORDER BY x; +SELECT toTypeName(r.lc), toTypeName(materialize(r.lc)), r.lc, materialize(r.lc), toTypeName(l.lc), toTypeName(materialize(l.lc)), l.lc, materialize(l.lc) FROM l_lc AS l RIGHT JOIN r USING (x) ORDER BY x, r.lc, l.lc; +SELECT toTypeName(r.lc), toTypeName(materialize(r.lc)), r.lc, materialize(r.lc), toTypeName(l.lc), toTypeName(materialize(l.lc)), l.lc, materialize(l.lc) FROM l_lc AS l RIGHT JOIN r USING (lc) ORDER BY x, r.lc, l.lc; +SELECT toTypeName(r.lc), toTypeName(materialize(r.lc)), r.lc, materialize(r.lc), toTypeName(l.lc), toTypeName(materialize(l.lc)), l.lc, materialize(l.lc) FROM l_lc AS l FULL JOIN r USING (x) ORDER BY x, r.lc, l.lc; +SELECT toTypeName(r.lc), toTypeName(materialize(r.lc)), r.lc, materialize(r.lc), toTypeName(l.lc), toTypeName(materialize(l.lc)), l.lc, materialize(l.lc) FROM l_lc AS l FULL JOIN r USING (lc) ORDER BY x, r.lc, l.lc; -- -SELECT lc, toTypeName(lc) FROM l RIGHT JOIN r USING (x) ORDER BY x; -SELECT lc, toTypeName(lc) FROM l RIGHT JOIN r USING (lc) ORDER BY x; -SELECT lc, toTypeName(lc) FROM l FULL JOIN r USING (x) ORDER BY x; -SELECT lc, toTypeName(lc) FROM l FULL JOIN r USING (lc) ORDER BY x; +SELECT lc, toTypeName(lc) FROM l RIGHT JOIN r USING (x) ORDER BY x, lc; +SELECT lc, toTypeName(lc) FROM l RIGHT JOIN r USING (lc) ORDER BY x, lc; +SELECT lc, toTypeName(lc) FROM l FULL JOIN r USING (x) ORDER BY x, lc; +SELECT lc, toTypeName(lc) FROM l FULL JOIN r USING (lc) ORDER BY x, lc; -SELECT toTypeName(r.lc), toTypeName(materialize(r.lc)), r.lc, materialize(r.lc), toTypeName(l.lc), toTypeName(materialize(l.lc)), l.lc, materialize(l.lc) FROM l RIGHT JOIN r_lc AS r USING (x) ORDER BY x; -SELECT toTypeName(r.lc), toTypeName(materialize(r.lc)), r.lc, materialize(r.lc), toTypeName(l.lc), toTypeName(materialize(l.lc)), l.lc, materialize(l.lc) FROM l RIGHT JOIN r_lc AS r USING (lc) ORDER BY x; -SELECT toTypeName(r.lc), toTypeName(materialize(r.lc)), r.lc, materialize(r.lc), toTypeName(l.lc), toTypeName(materialize(l.lc)), l.lc, materialize(l.lc) FROM l FULL JOIN r_lc AS r USING (x) ORDER BY x; -SELECT toTypeName(r.lc), toTypeName(materialize(r.lc)), r.lc, materialize(r.lc), toTypeName(l.lc), toTypeName(materialize(l.lc)), l.lc, materialize(l.lc) FROM l FULL JOIN r_lc AS r USING (lc) ORDER BY x; +SELECT toTypeName(r.lc), toTypeName(materialize(r.lc)), r.lc, materialize(r.lc), toTypeName(l.lc), toTypeName(materialize(l.lc)), l.lc, materialize(l.lc) FROM l RIGHT JOIN r_lc AS r USING (x) ORDER BY x, r.lc, l.lc; +SELECT toTypeName(r.lc), toTypeName(materialize(r.lc)), r.lc, materialize(r.lc), toTypeName(l.lc), toTypeName(materialize(l.lc)), l.lc, materialize(l.lc) FROM l RIGHT JOIN r_lc AS r USING (lc) ORDER BY x, r.lc, l.lc; +SELECT toTypeName(r.lc), toTypeName(materialize(r.lc)), r.lc, materialize(r.lc), toTypeName(l.lc), toTypeName(materialize(l.lc)), l.lc, materialize(l.lc) FROM l FULL JOIN r_lc AS r USING (x) ORDER BY x, r.lc, l.lc; +SELECT toTypeName(r.lc), toTypeName(materialize(r.lc)), r.lc, materialize(r.lc), toTypeName(l.lc), toTypeName(materialize(l.lc)), l.lc, materialize(l.lc) FROM l FULL JOIN r_lc AS r USING (lc) ORDER BY x, r.lc, l.lc; -- -SELECT lc, toTypeName(lc) FROM l_lc RIGHT JOIN nr USING (x) ORDER BY x; -SELECT lc, toTypeName(lc) FROM l_lc RIGHT JOIN nr USING (lc) ORDER BY x; -SELECT lc, toTypeName(lc) FROM l_lc FULL JOIN nr USING (x) ORDER BY x; -SELECT lc, toTypeName(lc) FROM l_lc FULL JOIN nr USING (lc) ORDER BY x; +SELECT lc, toTypeName(lc) FROM l_lc RIGHT JOIN nr USING (x) ORDER BY x, lc; +SELECT lc, toTypeName(lc) FROM l_lc RIGHT JOIN nr USING (lc) ORDER BY x, lc; +SELECT lc, toTypeName(lc) FROM l_lc FULL JOIN nr USING (x) ORDER BY x, lc; +SELECT lc, toTypeName(lc) FROM l_lc FULL JOIN nr USING (lc) ORDER BY x, lc; -SELECT toTypeName(r.lc), toTypeName(materialize(r.lc)), r.lc, materialize(r.lc), toTypeName(l.lc), toTypeName(materialize(l.lc)), l.lc, materialize(l.lc) FROM l_lc AS l RIGHT JOIN nr AS r USING (x) ORDER BY x; -SELECT toTypeName(r.lc), toTypeName(materialize(r.lc)), r.lc, materialize(r.lc), toTypeName(l.lc), toTypeName(materialize(l.lc)), l.lc, materialize(l.lc) FROM l_lc AS l RIGHT JOIN nr AS r USING (lc) ORDER BY x; -SELECT toTypeName(r.lc), toTypeName(materialize(r.lc)), r.lc, materialize(r.lc), toTypeName(l.lc), toTypeName(materialize(l.lc)), l.lc, materialize(l.lc) FROM l_lc AS l FULL JOIN nr AS r USING (x) ORDER BY x; -SELECT toTypeName(r.lc), toTypeName(materialize(r.lc)), r.lc, materialize(r.lc), toTypeName(l.lc), toTypeName(materialize(l.lc)), l.lc, materialize(l.lc) FROM l_lc AS l FULL JOIN nr AS r USING (lc) ORDER BY x; +SELECT toTypeName(r.lc), toTypeName(materialize(r.lc)), r.lc, materialize(r.lc), toTypeName(l.lc), toTypeName(materialize(l.lc)), l.lc, materialize(l.lc) FROM l_lc AS l RIGHT JOIN nr AS r USING (x) ORDER BY x, r.lc, l.lc; +SELECT toTypeName(r.lc), toTypeName(materialize(r.lc)), r.lc, materialize(r.lc), toTypeName(l.lc), toTypeName(materialize(l.lc)), l.lc, materialize(l.lc) FROM l_lc AS l RIGHT JOIN nr AS r USING (lc) ORDER BY x, r.lc, l.lc; +SELECT toTypeName(r.lc), toTypeName(materialize(r.lc)), r.lc, materialize(r.lc), toTypeName(l.lc), toTypeName(materialize(l.lc)), l.lc, materialize(l.lc) FROM l_lc AS l FULL JOIN nr AS r USING (x) ORDER BY x, r.lc, l.lc; +SELECT toTypeName(r.lc), toTypeName(materialize(r.lc)), r.lc, materialize(r.lc), toTypeName(l.lc), toTypeName(materialize(l.lc)), l.lc, materialize(l.lc) FROM l_lc AS l FULL JOIN nr AS r USING (lc) ORDER BY x, r.lc, l.lc; -- -SELECT lc, toTypeName(lc) FROM nl RIGHT JOIN r_lc USING (x) ORDER BY x; -SELECT lc, toTypeName(lc) FROM nl RIGHT JOIN r_lc USING (lc) ORDER BY x; -SELECT lc, toTypeName(lc) FROM nl FULL JOIN r_lc USING (x) ORDER BY x; -SELECT lc, toTypeName(lc) FROM nl FULL JOIN r_lc USING (lc) ORDER BY x; +SELECT lc, toTypeName(lc) FROM nl RIGHT JOIN r_lc USING (x) ORDER BY x, lc; +SELECT lc, toTypeName(lc) FROM nl RIGHT JOIN r_lc USING (lc) ORDER BY x, lc; +SELECT lc, toTypeName(lc) FROM nl FULL JOIN r_lc USING (x) ORDER BY x, lc; +SELECT lc, toTypeName(lc) FROM nl FULL JOIN r_lc USING (lc) ORDER BY x, lc; -SELECT toTypeName(r.lc), toTypeName(materialize(r.lc)), r.lc, materialize(r.lc), toTypeName(l.lc), toTypeName(materialize(l.lc)), l.lc, materialize(l.lc) FROM nl AS l RIGHT JOIN r_lc AS r USING (x) ORDER BY x; -SELECT toTypeName(r.lc), toTypeName(materialize(r.lc)), r.lc, materialize(r.lc), toTypeName(l.lc), toTypeName(materialize(l.lc)), l.lc, materialize(l.lc) FROM nl AS l RIGHT JOIN r_lc AS r USING (lc) ORDER BY x; -SELECT toTypeName(r.lc), toTypeName(materialize(r.lc)), r.lc, materialize(r.lc), toTypeName(l.lc), toTypeName(materialize(l.lc)), l.lc, materialize(l.lc) FROM nl AS l FULL JOIN r_lc AS r USING (x) ORDER BY x; -SELECT toTypeName(r.lc), toTypeName(materialize(r.lc)), r.lc, materialize(r.lc), toTypeName(l.lc), toTypeName(materialize(l.lc)), l.lc, materialize(l.lc) FROM nl AS l FULL JOIN r_lc AS r USING (lc) ORDER BY x; +SELECT toTypeName(r.lc), toTypeName(materialize(r.lc)), r.lc, materialize(r.lc), toTypeName(l.lc), toTypeName(materialize(l.lc)), l.lc, materialize(l.lc) FROM nl AS l RIGHT JOIN r_lc AS r USING (x) ORDER BY x, r.lc, l.lc; +SELECT toTypeName(r.lc), toTypeName(materialize(r.lc)), r.lc, materialize(r.lc), toTypeName(l.lc), toTypeName(materialize(l.lc)), l.lc, materialize(l.lc) FROM nl AS l RIGHT JOIN r_lc AS r USING (lc) ORDER BY x, r.lc, l.lc; +SELECT toTypeName(r.lc), toTypeName(materialize(r.lc)), r.lc, materialize(r.lc), toTypeName(l.lc), toTypeName(materialize(l.lc)), l.lc, materialize(l.lc) FROM nl AS l FULL JOIN r_lc AS r USING (x) ORDER BY x, r.lc, l.lc; +SELECT toTypeName(r.lc), toTypeName(materialize(r.lc)), r.lc, materialize(r.lc), toTypeName(l.lc), toTypeName(materialize(l.lc)), l.lc, materialize(l.lc) FROM nl AS l FULL JOIN r_lc AS r USING (lc) ORDER BY x, r.lc, l.lc; {% endfor -%} {% endfor -%} @@ -94,6 +95,7 @@ SELECT toTypeName(r.lc), toTypeName(materialize(r.lc)), r.lc, materialize(r.lc), SELECT '--'; SET join_use_nulls = 0; + SELECT lc, toTypeName(lc) FROM l_lc AS l RIGHT JOIN r_lc AS r USING (x) ORDER BY l.lc; SELECT lowCardinalityKeys(lc.lc) FROM r FULL JOIN l_lc as lc USING (lc) ORDER BY lowCardinalityKeys(lc.lc); From 681d7d679900b6444654a9061ba3ebb3aeadeeb5 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Fri, 22 Apr 2022 11:00:00 +0800 Subject: [PATCH 035/106] update dispatchBlock() --- src/Interpreters/ConcurrentHashJoin.cpp | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) diff --git a/src/Interpreters/ConcurrentHashJoin.cpp b/src/Interpreters/ConcurrentHashJoin.cpp index ab4ea242882..84feae500b4 100644 --- a/src/Interpreters/ConcurrentHashJoin.cpp +++ b/src/Interpreters/ConcurrentHashJoin.cpp @@ -137,6 +137,7 @@ size_t ConcurrentHashJoin::getTotalRowCount() const size_t res = 0; for (const auto & hash_join : hash_joins) { + std::lock_guard lokc(hash_join->mutex); res += hash_join->data->getTotalRowCount(); } return res; @@ -147,6 +148,7 @@ size_t ConcurrentHashJoin::getTotalByteCount() const size_t res = 0; for (const auto & hash_join : hash_joins) { + std::lock_guard lokc(hash_join->mutex); res += hash_join->data->getTotalByteCount(); } return res; @@ -156,6 +158,7 @@ bool ConcurrentHashJoin::alwaysReturnsEmptySet() const { for (const auto & hash_join : hash_joins) { + std::lock_guard lokc(hash_join->mutex); if (!hash_join->data->alwaysReturnsEmptySet()) return false; } @@ -226,25 +229,8 @@ void ConcurrentHashJoin::dispatchBlock(BlockDispatchControlData & dispatch_data, } if (selector_column.column->isNullable()) { - const auto * nullable_col = typeid_cast(selector_column.column.get()); - const auto & nested_col = nullable_col->getNestedColumnPtr(); - size_t last_offset = 0; - MutableColumnPtr dst = nullable_col->cloneEmpty(); - for (size_t i = 0, sz = selector_column.column->size(); i < sz; ++i) - { - if (selector_column.column->isNullAt(i))[[unlikely]] - { - if (i > last_offset)[[likely]] - dst->insertRangeFrom(*nested_col, last_offset, i - last_offset); - dst->insertDefault(); - last_offset = i + 1; - } - } - if (last_offset < selector_column.column->size()) - { - dst->insertRangeFrom(*nested_col, last_offset, selector_column.column->size() - last_offset); - } - selector_column.column = std::move(dst); + // use the default value for null rows. + selector_column.column = typeid_cast(selector_column.column.get())->getNestedColumnPtr(); } auto selector = createBlockSelector(*selector_column.column, selector_slots); From 9fe049de51cafee808eaa4ee7ef4225755e6aad0 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Fri, 22 Apr 2022 14:22:57 +0800 Subject: [PATCH 036/106] update test case 01383_remote_ambiguous_column_shard --- .../queries/0_stateless/01383_remote_ambiguous_column_shard.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01383_remote_ambiguous_column_shard.sql b/tests/queries/0_stateless/01383_remote_ambiguous_column_shard.sql index de552b7b27f..b4c018cacdc 100644 --- a/tests/queries/0_stateless/01383_remote_ambiguous_column_shard.sql +++ b/tests/queries/0_stateless/01383_remote_ambiguous_column_shard.sql @@ -8,6 +8,6 @@ create table test_01383.dimension (id1 Int64, name String) ENGINE = MergeTree() insert into test_01383.fact values (1,2,10),(2,2,10),(3,3,10),(4,3,10); insert into test_01383.dimension values (1,'name_1'),(2,'name_1'),(3,'name_3'),(4, 'name_4'); -SELECT f.id1 AS ID, d.name AS Name, sum(f.value) FROM remote('127.0.0.{1,2,3}', test_01383.fact) AS f LEFT JOIN test_01383.dimension AS d ON f.id1 = d.id1 WHERE f.id1 = f.id2 GROUP BY ID, Name; +SELECT f.id1 AS ID, d.name AS Name, sum(f.value) FROM remote('127.0.0.{1,2,3}', test_01383.fact) AS f LEFT JOIN test_01383.dimension AS d ON f.id1 = d.id1 WHERE f.id1 = f.id2 GROUP BY ID, Name ORDER BY ID; DROP DATABASE test_01383; From b6ed16e360df23509ac39e8be47e413b890a4283 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Fri, 22 Apr 2022 18:27:54 +0800 Subject: [PATCH 037/106] update test case tests/queries/0_stateless/01049_join_low_card_bug_long.reference --- .../01049_join_low_card_bug_long.reference | 202 ++++++++++++++++++ .../01049_join_low_card_bug_long.sql.j2 | 2 +- 2 files changed, 203 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01049_join_low_card_bug_long.reference b/tests/queries/0_stateless/01049_join_low_card_bug_long.reference index c5d02a66ba5..6587fab28d2 100644 --- a/tests/queries/0_stateless/01049_join_low_card_bug_long.reference +++ b/tests/queries/0_stateless/01049_join_low_card_bug_long.reference @@ -402,6 +402,208 @@ LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) \N \N Nullable LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) str str Nullable(String) Nullable(String) str str LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) \N \N Nullable(String) Nullable(String) str_l str_l LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) str_r str_r Nullable(String) Nullable(String) \N \N +-- join_algorithm = parallel_hash, join_use_nulls = 0 -- +str LowCardinality(String) + LowCardinality(String) +str LowCardinality(String) +str_r LowCardinality(String) +str LowCardinality(String) + LowCardinality(String) +str_l LowCardinality(String) +str LowCardinality(String) +str_r LowCardinality(String) +str_l LowCardinality(String) +LowCardinality(String) LowCardinality(String) str str LowCardinality(String) LowCardinality(String) str str +LowCardinality(String) LowCardinality(String) str_r str_r LowCardinality(String) LowCardinality(String) +LowCardinality(String) LowCardinality(String) str str LowCardinality(String) LowCardinality(String) str str +LowCardinality(String) LowCardinality(String) str_r str_r LowCardinality(String) LowCardinality(String) +LowCardinality(String) LowCardinality(String) str str LowCardinality(String) LowCardinality(String) str str +LowCardinality(String) LowCardinality(String) str_r str_r LowCardinality(String) LowCardinality(String) +LowCardinality(String) LowCardinality(String) LowCardinality(String) LowCardinality(String) str_l str_l +LowCardinality(String) LowCardinality(String) str str LowCardinality(String) LowCardinality(String) str str +LowCardinality(String) LowCardinality(String) str_r str_r LowCardinality(String) LowCardinality(String) +LowCardinality(String) LowCardinality(String) LowCardinality(String) LowCardinality(String) str_l str_l +str LowCardinality(String) + LowCardinality(String) +str LowCardinality(String) +str_r LowCardinality(String) +str LowCardinality(String) + LowCardinality(String) +str_l LowCardinality(String) +str LowCardinality(String) +str_r LowCardinality(String) +str_l LowCardinality(String) +String String str str LowCardinality(String) LowCardinality(String) str str +String String str_r str_r LowCardinality(String) LowCardinality(String) +String String str str LowCardinality(String) LowCardinality(String) str str +String String str_r str_r LowCardinality(String) LowCardinality(String) +String String str str LowCardinality(String) LowCardinality(String) str str +String String str_r str_r LowCardinality(String) LowCardinality(String) +String String LowCardinality(String) LowCardinality(String) str_l str_l +String String str str LowCardinality(String) LowCardinality(String) str str +String String str_r str_r LowCardinality(String) LowCardinality(String) +String String LowCardinality(String) LowCardinality(String) str_l str_l +str String + String +str String +str_r String +str String + String +str_l String +str String +str_r String +str_l String +LowCardinality(String) LowCardinality(String) str str String String str str +LowCardinality(String) LowCardinality(String) str_r str_r String String +LowCardinality(String) LowCardinality(String) str str String String str str +LowCardinality(String) LowCardinality(String) str_r str_r String String +LowCardinality(String) LowCardinality(String) str str String String str str +LowCardinality(String) LowCardinality(String) str_r str_r String String +LowCardinality(String) LowCardinality(String) String String str_l str_l +LowCardinality(String) LowCardinality(String) str str String String str str +LowCardinality(String) LowCardinality(String) str_r str_r String String +LowCardinality(String) LowCardinality(String) String String str_l str_l +str LowCardinality(String) + LowCardinality(String) +str LowCardinality(String) +str_r LowCardinality(String) +str LowCardinality(String) + LowCardinality(String) +str_l LowCardinality(String) +str LowCardinality(String) +str_r LowCardinality(String) +str_l LowCardinality(String) +Nullable(String) Nullable(String) str str LowCardinality(String) LowCardinality(String) str str +Nullable(String) Nullable(String) str_r str_r LowCardinality(String) LowCardinality(String) +Nullable(String) Nullable(String) str str LowCardinality(String) LowCardinality(String) str str +Nullable(String) Nullable(String) str_r str_r LowCardinality(String) LowCardinality(String) +Nullable(String) Nullable(String) str str LowCardinality(String) LowCardinality(String) str str +Nullable(String) Nullable(String) str_r str_r LowCardinality(String) LowCardinality(String) +Nullable(String) Nullable(String) \N \N LowCardinality(String) LowCardinality(String) str_l str_l +Nullable(String) Nullable(String) str str LowCardinality(String) LowCardinality(String) str str +Nullable(String) Nullable(String) str_r str_r LowCardinality(String) LowCardinality(String) +Nullable(String) Nullable(String) \N \N LowCardinality(String) LowCardinality(String) str_l str_l +str Nullable(String) +\N Nullable(String) +str Nullable(String) +str_r Nullable(String) +str Nullable(String) +\N Nullable(String) +str_l Nullable(String) +str Nullable(String) +str_l Nullable(String) +str_r Nullable(String) +LowCardinality(String) LowCardinality(String) str str Nullable(String) Nullable(String) str str +LowCardinality(String) LowCardinality(String) str_r str_r Nullable(String) Nullable(String) \N \N +LowCardinality(String) LowCardinality(String) str str Nullable(String) Nullable(String) str str +LowCardinality(String) LowCardinality(String) str_r str_r Nullable(String) Nullable(String) \N \N +LowCardinality(String) LowCardinality(String) str str Nullable(String) Nullable(String) str str +LowCardinality(String) LowCardinality(String) str_r str_r Nullable(String) Nullable(String) \N \N +LowCardinality(String) LowCardinality(String) Nullable(String) Nullable(String) str_l str_l +LowCardinality(String) LowCardinality(String) str str Nullable(String) Nullable(String) str str +LowCardinality(String) LowCardinality(String) Nullable(String) Nullable(String) str_l str_l +LowCardinality(String) LowCardinality(String) str_r str_r Nullable(String) Nullable(String) \N \N +-- join_algorithm = parallel_hash, join_use_nulls = 1 -- +str LowCardinality(Nullable(String)) +\N LowCardinality(Nullable(String)) +str LowCardinality(Nullable(String)) +str_r LowCardinality(Nullable(String)) +str LowCardinality(Nullable(String)) +\N LowCardinality(Nullable(String)) +str_l LowCardinality(Nullable(String)) +str LowCardinality(Nullable(String)) +str_l LowCardinality(Nullable(String)) +str_r LowCardinality(Nullable(String)) +LowCardinality(String) LowCardinality(String) str str LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) str str +LowCardinality(String) LowCardinality(String) str_r str_r LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) \N \N +LowCardinality(String) LowCardinality(String) str str LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) str str +LowCardinality(String) LowCardinality(String) str_r str_r LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) \N \N +LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) str str LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) str str +LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) str_r str_r LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) \N \N +LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) \N \N LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) str_l str_l +LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) str str LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) str str +LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) \N \N LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) str_l str_l +LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) str_r str_r LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) \N \N +str LowCardinality(Nullable(String)) +\N LowCardinality(Nullable(String)) +str LowCardinality(Nullable(String)) +str_r LowCardinality(Nullable(String)) +str LowCardinality(Nullable(String)) +\N LowCardinality(Nullable(String)) +str_l LowCardinality(Nullable(String)) +str LowCardinality(Nullable(String)) +str_l LowCardinality(Nullable(String)) +str_r LowCardinality(Nullable(String)) +String String str str LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) str str +String String str_r str_r LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) \N \N +String String str str LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) str str +String String str_r str_r LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) \N \N +Nullable(String) Nullable(String) str str LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) str str +Nullable(String) Nullable(String) str_r str_r LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) \N \N +Nullable(String) Nullable(String) \N \N LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) str_l str_l +Nullable(String) Nullable(String) str str LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) str str +Nullable(String) Nullable(String) \N \N LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) str_l str_l +Nullable(String) Nullable(String) str_r str_r LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) \N \N +str Nullable(String) +\N Nullable(String) +str Nullable(String) +str_r Nullable(String) +str Nullable(String) +\N Nullable(String) +str_l Nullable(String) +str Nullable(String) +str_l Nullable(String) +str_r Nullable(String) +LowCardinality(String) LowCardinality(String) str str Nullable(String) Nullable(String) str str +LowCardinality(String) LowCardinality(String) str_r str_r Nullable(String) Nullable(String) \N \N +LowCardinality(String) LowCardinality(String) str str Nullable(String) Nullable(String) str str +LowCardinality(String) LowCardinality(String) str_r str_r Nullable(String) Nullable(String) \N \N +LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) str str Nullable(String) Nullable(String) str str +LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) str_r str_r Nullable(String) Nullable(String) \N \N +LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) \N \N Nullable(String) Nullable(String) str_l str_l +LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) str str Nullable(String) Nullable(String) str str +LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) \N \N Nullable(String) Nullable(String) str_l str_l +LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) str_r str_r Nullable(String) Nullable(String) \N \N +str LowCardinality(Nullable(String)) +\N LowCardinality(Nullable(String)) +str LowCardinality(Nullable(String)) +str_r LowCardinality(Nullable(String)) +str LowCardinality(Nullable(String)) +\N LowCardinality(Nullable(String)) +str_l LowCardinality(Nullable(String)) +str LowCardinality(Nullable(String)) +str_l LowCardinality(Nullable(String)) +str_r LowCardinality(Nullable(String)) +Nullable(String) Nullable(String) str str LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) str str +Nullable(String) Nullable(String) str_r str_r LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) \N \N +Nullable(String) Nullable(String) str str LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) str str +Nullable(String) Nullable(String) str_r str_r LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) \N \N +Nullable(String) Nullable(String) str str LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) str str +Nullable(String) Nullable(String) str_r str_r LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) \N \N +Nullable(String) Nullable(String) \N \N LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) str_l str_l +Nullable(String) Nullable(String) str str LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) str str +Nullable(String) Nullable(String) \N \N LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) str_l str_l +Nullable(String) Nullable(String) str_r str_r LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) \N \N +str Nullable(String) +\N Nullable(String) +str Nullable(String) +str_r Nullable(String) +str Nullable(String) +\N Nullable(String) +str_l Nullable(String) +str Nullable(String) +str_l Nullable(String) +str_r Nullable(String) +LowCardinality(String) LowCardinality(String) str str Nullable(String) Nullable(String) str str +LowCardinality(String) LowCardinality(String) str_r str_r Nullable(String) Nullable(String) \N \N +LowCardinality(String) LowCardinality(String) str str Nullable(String) Nullable(String) str str +LowCardinality(String) LowCardinality(String) str_r str_r Nullable(String) Nullable(String) \N \N +LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) str str Nullable(String) Nullable(String) str str +LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) str_r str_r Nullable(String) Nullable(String) \N \N +LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) \N \N Nullable(String) Nullable(String) str_l str_l +LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) str str Nullable(String) Nullable(String) str str +LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) \N \N Nullable(String) Nullable(String) str_l str_l +LowCardinality(Nullable(String)) LowCardinality(Nullable(String)) str_r str_r Nullable(String) Nullable(String) \N \N -- LowCardinality(String) str LowCardinality(String) diff --git a/tests/queries/0_stateless/01049_join_low_card_bug_long.sql.j2 b/tests/queries/0_stateless/01049_join_low_card_bug_long.sql.j2 index 6328c16085b..9dd8f810b40 100644 --- a/tests/queries/0_stateless/01049_join_low_card_bug_long.sql.j2 +++ b/tests/queries/0_stateless/01049_join_low_card_bug_long.sql.j2 @@ -22,7 +22,7 @@ INSERT INTO l VALUES (0, 'str'), (2, 'str_l'); INSERT INTO nl VALUES (0, 'str'), (2, 'str_l'); INSERT INTO l_lc VALUES (0, 'str'), (2, 'str_l'); -{% for join_algorithm in [None, 'partial_merge'] -%} +{% for join_algorithm in [None, 'partial_merge', 'parallel_hash'] -%} {% for join_use_nulls in [0, 1] -%} SELECT '-- join_algorithm = {{ join_algorithm or 'default' }}, join_use_nulls = {{ join_use_nulls }} --'; From 056b4d1b61ac8e66f9b4bcce1a57399f2454ab19 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Sun, 24 Apr 2022 09:26:57 +0800 Subject: [PATCH 038/106] update --- src/Interpreters/ConcurrentHashJoin.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Interpreters/ConcurrentHashJoin.cpp b/src/Interpreters/ConcurrentHashJoin.cpp index 84feae500b4..4b230affc08 100644 --- a/src/Interpreters/ConcurrentHashJoin.cpp +++ b/src/Interpreters/ConcurrentHashJoin.cpp @@ -83,9 +83,7 @@ bool ConcurrentHashJoin::addJoinedBlock(const Block & block, bool check_limits) iter = pending_blocks.erase(iter); } else - { iter++; - } } } From a38b580c6f0d035be4a429358a6f86a0bb4fa579 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Sun, 24 Apr 2022 15:19:16 +0800 Subject: [PATCH 039/106] update --- src/Interpreters/ConcurrentHashJoin.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Interpreters/ConcurrentHashJoin.cpp b/src/Interpreters/ConcurrentHashJoin.cpp index 4b230affc08..84feae500b4 100644 --- a/src/Interpreters/ConcurrentHashJoin.cpp +++ b/src/Interpreters/ConcurrentHashJoin.cpp @@ -83,7 +83,9 @@ bool ConcurrentHashJoin::addJoinedBlock(const Block & block, bool check_limits) iter = pending_blocks.erase(iter); } else + { iter++; + } } } From 221bc9c9709cb5bbc852669abfe5a0ac24800bd5 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Sun, 24 Apr 2022 17:14:13 +0800 Subject: [PATCH 040/106] update --- src/Interpreters/ConcurrentHashJoin.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Interpreters/ConcurrentHashJoin.cpp b/src/Interpreters/ConcurrentHashJoin.cpp index 84feae500b4..4b230affc08 100644 --- a/src/Interpreters/ConcurrentHashJoin.cpp +++ b/src/Interpreters/ConcurrentHashJoin.cpp @@ -83,9 +83,7 @@ bool ConcurrentHashJoin::addJoinedBlock(const Block & block, bool check_limits) iter = pending_blocks.erase(iter); } else - { iter++; - } } } From c68806fbff4bd5b7b0b560905dd662047e31520e Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Mon, 25 Apr 2022 16:16:50 +0800 Subject: [PATCH 041/106] fixed typos --- src/Interpreters/ConcurrentHashJoin.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/ConcurrentHashJoin.cpp b/src/Interpreters/ConcurrentHashJoin.cpp index 4b230affc08..83137da1bf3 100644 --- a/src/Interpreters/ConcurrentHashJoin.cpp +++ b/src/Interpreters/ConcurrentHashJoin.cpp @@ -135,7 +135,7 @@ size_t ConcurrentHashJoin::getTotalRowCount() const size_t res = 0; for (const auto & hash_join : hash_joins) { - std::lock_guard lokc(hash_join->mutex); + std::lock_guard lock(hash_join->mutex); res += hash_join->data->getTotalRowCount(); } return res; @@ -146,7 +146,7 @@ size_t ConcurrentHashJoin::getTotalByteCount() const size_t res = 0; for (const auto & hash_join : hash_joins) { - std::lock_guard lokc(hash_join->mutex); + std::lock_guard lock(hash_join->mutex); res += hash_join->data->getTotalByteCount(); } return res; @@ -156,7 +156,7 @@ bool ConcurrentHashJoin::alwaysReturnsEmptySet() const { for (const auto & hash_join : hash_joins) { - std::lock_guard lokc(hash_join->mutex); + std::lock_guard lock(hash_join->mutex); if (!hash_join->data->alwaysReturnsEmptySet()) return false; } From 432ba871f8f64c32c292d8893d12602f3d578222 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Mon, 25 Apr 2022 18:02:10 +0800 Subject: [PATCH 042/106] update test case tests/queries/0_stateless/01925_join_materialized_columns --- tests/queries/0_stateless/01925_join_materialized_columns.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01925_join_materialized_columns.sql b/tests/queries/0_stateless/01925_join_materialized_columns.sql index 7c56d5fea39..271c230c35b 100644 --- a/tests/queries/0_stateless/01925_join_materialized_columns.sql +++ b/tests/queries/0_stateless/01925_join_materialized_columns.sql @@ -34,7 +34,7 @@ SELECT * FROM t1 ALL JOIN t2 ON t1.dt = t2.dt ORDER BY t1.time, t2.time; SELECT '-'; SELECT * FROM t1 ALL JOIN t2 USING (dt) ORDER BY t1.time, t2.time; SELECT '-'; -SELECT * FROM t1 JOIN t2 ON t1.dt1 = t2.dt2 ORDER BY t1.time, t2.time; +SELECT * FROM t1 JOIN t2 ON t1.dt1 = t2.dt2 ORDER BY t1.time, t1.dimension_1, t2.time, t2.dimension_2; SELECT '-'; SELECT * FROM t1 JOIN t2 ON t1.foo = t2.bar WHERE t2.aliascol2 == 'fact2t1_val2'; SELECT '-'; From c7841d226b6037bbd113ce9dc125d60f01c530c1 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Tue, 26 Apr 2022 11:29:18 +0800 Subject: [PATCH 043/106] udpate --- src/Interpreters/ConcurrentHashJoin.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Interpreters/ConcurrentHashJoin.cpp b/src/Interpreters/ConcurrentHashJoin.cpp index 83137da1bf3..fee3616a697 100644 --- a/src/Interpreters/ConcurrentHashJoin.cpp +++ b/src/Interpreters/ConcurrentHashJoin.cpp @@ -83,7 +83,9 @@ bool ConcurrentHashJoin::addJoinedBlock(const Block & block, bool check_limits) iter = pending_blocks.erase(iter); } else + { iter++; + } } } From 0b0fa8453bcfb1a1b9045813765242cd19aec851 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Tue, 26 Apr 2022 18:06:16 +0800 Subject: [PATCH 044/106] fixed bug: resize on left pipeline cause the order by result wrong --- src/Interpreters/InterpreterSelectQuery.cpp | 13 ++++++-- src/Processors/QueryPlan/JoinStep.cpp | 6 ++-- src/Processors/QueryPlan/JoinStep.h | 4 +-- .../QueryPlan/ResizeStreamsStep.cpp | 31 +++++++++++++++++++ src/Processors/QueryPlan/ResizeStreamsStep.h | 15 +++++++++ src/QueryPipeline/QueryPipelineBuilder.cpp | 9 ++---- src/QueryPipeline/QueryPipelineBuilder.h | 3 +- 7 files changed, 63 insertions(+), 18 deletions(-) create mode 100644 src/Processors/QueryPlan/ResizeStreamsStep.cpp create mode 100644 src/Processors/QueryPlan/ResizeStreamsStep.h diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 2874e9a8178..deaac32e275 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -54,6 +54,7 @@ #include #include #include +#include #include #include #include @@ -1268,13 +1269,21 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

supportParallelJoin()) + { + joined_plan->addStep(std::make_unique(joined_plan->getCurrentDataStream(), max_streams)); + + } + // If optimize_read_in_order = true, do not change the left pipeline's stream size. + // otherwise will make the result wrong for order by. + if (!analysis_result.optimize_read_in_order) + query_plan.addStep(std::make_unique(query_plan.getCurrentDataStream(), max_streams)); QueryPlanStepPtr join_step = std::make_unique( query_plan.getCurrentDataStream(), joined_plan->getCurrentDataStream(), expressions.join, - settings.max_block_size, - max_streams); + settings.max_block_size); join_step->setStepDescription("JOIN"); std::vector plans; diff --git a/src/Processors/QueryPlan/JoinStep.cpp b/src/Processors/QueryPlan/JoinStep.cpp index 0170c356459..494a2a6aa0e 100644 --- a/src/Processors/QueryPlan/JoinStep.cpp +++ b/src/Processors/QueryPlan/JoinStep.cpp @@ -15,11 +15,9 @@ JoinStep::JoinStep( const DataStream & left_stream_, const DataStream & right_stream_, JoinPtr join_, - size_t max_block_size_, - size_t max_streams_) + size_t max_block_size_) : join(std::move(join_)) , max_block_size(max_block_size_) - , max_streams(max_streams_) { input_streams = {left_stream_, right_stream_}; output_stream = DataStream @@ -33,7 +31,7 @@ QueryPipelineBuilderPtr JoinStep::updatePipeline(QueryPipelineBuilders pipelines if (pipelines.size() != 2) throw Exception(ErrorCodes::LOGICAL_ERROR, "JoinStep expect two input steps"); - return QueryPipelineBuilder::joinPipelines(std::move(pipelines[0]), std::move(pipelines[1]), join, max_block_size, &processors, max_streams); + return QueryPipelineBuilder::joinPipelines(std::move(pipelines[0]), std::move(pipelines[1]), join, max_block_size, &processors); } void JoinStep::describePipeline(FormatSettings & settings) const diff --git a/src/Processors/QueryPlan/JoinStep.h b/src/Processors/QueryPlan/JoinStep.h index 0ae1f78594b..71537f29a8e 100644 --- a/src/Processors/QueryPlan/JoinStep.h +++ b/src/Processors/QueryPlan/JoinStep.h @@ -16,8 +16,7 @@ public: const DataStream & left_stream_, const DataStream & right_stream_, JoinPtr join_, - size_t max_block_size_, - size_t max_streams_ = 0); + size_t max_block_size_); String getName() const override { return "Join"; } @@ -30,7 +29,6 @@ public: private: JoinPtr join; size_t max_block_size; - size_t max_streams; Processors processors; }; diff --git a/src/Processors/QueryPlan/ResizeStreamsStep.cpp b/src/Processors/QueryPlan/ResizeStreamsStep.cpp new file mode 100644 index 00000000000..580d11fa5f8 --- /dev/null +++ b/src/Processors/QueryPlan/ResizeStreamsStep.cpp @@ -0,0 +1,31 @@ +#include +#include +namespace DB +{ +static ITransformingStep::Traits getTraits() +{ + return ITransformingStep::Traits + { + { + .preserves_distinct_columns = false, + .returns_single_stream = false, + .preserves_number_of_streams = true, + .preserves_sorting = false, + }, + { + .preserves_number_of_rows = false, + } + }; +} + +ResizeStreamsStep::ResizeStreamsStep(const DataStream & input_stream_, size_t pipeline_streams_) + : ITransformingStep(input_stream_, input_stream_.header, getTraits()) + , pipeline_streams(pipeline_streams_) +{ +} + +void ResizeStreamsStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) +{ + pipeline.resize(pipeline_streams); +} +} diff --git a/src/Processors/QueryPlan/ResizeStreamsStep.h b/src/Processors/QueryPlan/ResizeStreamsStep.h new file mode 100644 index 00000000000..f26dab36671 --- /dev/null +++ b/src/Processors/QueryPlan/ResizeStreamsStep.h @@ -0,0 +1,15 @@ +#include +#include + +namespace DB +{ +class ResizeStreamsStep : public ITransformingStep +{ +public: + explicit ResizeStreamsStep(const DataStream & input_stream_, size_t pipeline_streams_); + String getName() const override { return "ResizeStreamsStep"; } + void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; +private: + size_t pipeline_streams; +}; +} diff --git a/src/QueryPipeline/QueryPipelineBuilder.cpp b/src/QueryPipeline/QueryPipelineBuilder.cpp index 7241548d013..5feed7c55ea 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.cpp +++ b/src/QueryPipeline/QueryPipelineBuilder.cpp @@ -307,8 +307,7 @@ std::unique_ptr QueryPipelineBuilder::joinPipelines( std::unique_ptr right, JoinPtr join, size_t max_block_size, - Processors * collected_processors, - size_t max_streams) + Processors * collected_processors) { left->checkInitializedAndNotCompleted(); right->checkInitializedAndNotCompleted(); @@ -346,14 +345,10 @@ std::unique_ptr QueryPipelineBuilder::joinPipelines( /// ╞> FillingJoin ─> Resize ╣ ╞> Joining ─> (totals) /// (totals) ─────────┘ ╙─────┘ - // In some cases, left's streams is too smaller then max_streams. Keep it same as max_streams - // to make full use of cpu. - auto & num_streams = max_streams; - left->resize(num_streams); + auto num_streams = left->getNumStreams(); if (join->supportParallelJoin() && !right->hasTotals()) { - right->resize(num_streams); auto concurrent_right_filling_transform = [&](OutputPortRawPtrs outports) { Processors processors; diff --git a/src/QueryPipeline/QueryPipelineBuilder.h b/src/QueryPipeline/QueryPipelineBuilder.h index 5f483b86c1c..ac84191cf34 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.h +++ b/src/QueryPipeline/QueryPipelineBuilder.h @@ -101,8 +101,7 @@ public: std::unique_ptr right, JoinPtr join, size_t max_block_size, - Processors * collected_processors = nullptr, - size_t max_streams = 0); + Processors * collected_processors = nullptr); /// Add other pipeline and execute it before current one. /// Pipeline must have empty header, it should not generate any chunk. From d96c29810a6a53fdda417633d2b41f81db5e41dd Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Tue, 26 Apr 2022 18:12:14 +0800 Subject: [PATCH 045/106] fixed bug: resize on left pipeline cause the order by result wrong --- src/Interpreters/InterpreterSelectQuery.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index deaac32e275..3ad33ba652e 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1269,15 +1269,16 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

supportParallelJoin()) - { - joined_plan->addStep(std::make_unique(joined_plan->getCurrentDataStream(), max_streams)); - - } - // If optimize_read_in_order = true, do not change the left pipeline's stream size. - // otherwise will make the result wrong for order by. if (!analysis_result.optimize_read_in_order) + { + if (expressions.join->supportParallelJoin()) + { + joined_plan->addStep(std::make_unique(joined_plan->getCurrentDataStream(), max_streams)); + } + // If optimize_read_in_order = true, do not change the left pipeline's stream size. + // otherwise will make the result wrong for order by. query_plan.addStep(std::make_unique(query_plan.getCurrentDataStream(), max_streams)); + } QueryPlanStepPtr join_step = std::make_unique( query_plan.getCurrentDataStream(), From 20fc676bff5505203f1db8e5889608c7df1b6276 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Tue, 26 Apr 2022 18:27:42 +0800 Subject: [PATCH 046/106] fixed bug: resize on left pipeline cause the order by result wrong --- src/Interpreters/InterpreterSelectQuery.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 3ad33ba652e..cc9af95523c 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1269,12 +1269,12 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

supportParallelJoin()) + { + joined_plan->addStep(std::make_unique(joined_plan->getCurrentDataStream(), max_streams)); + } if (!analysis_result.optimize_read_in_order) { - if (expressions.join->supportParallelJoin()) - { - joined_plan->addStep(std::make_unique(joined_plan->getCurrentDataStream(), max_streams)); - } // If optimize_read_in_order = true, do not change the left pipeline's stream size. // otherwise will make the result wrong for order by. query_plan.addStep(std::make_unique(query_plan.getCurrentDataStream(), max_streams)); From 6cb7b7888fd89455068c1745de194af22794275c Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Tue, 26 Apr 2022 19:07:07 +0800 Subject: [PATCH 047/106] update test case 02236_explain_pipeline_join --- src/Processors/QueryPlan/ResizeStreamsStep.h | 1 + .../02236_explain_pipeline_join.reference | 24 +++++++++---------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/src/Processors/QueryPlan/ResizeStreamsStep.h b/src/Processors/QueryPlan/ResizeStreamsStep.h index f26dab36671..40cd654d3ab 100644 --- a/src/Processors/QueryPlan/ResizeStreamsStep.h +++ b/src/Processors/QueryPlan/ResizeStreamsStep.h @@ -1,3 +1,4 @@ +#pragma once #include #include diff --git a/tests/queries/0_stateless/02236_explain_pipeline_join.reference b/tests/queries/0_stateless/02236_explain_pipeline_join.reference index ad54708c9be..ed993e2a1e7 100644 --- a/tests/queries/0_stateless/02236_explain_pipeline_join.reference +++ b/tests/queries/0_stateless/02236_explain_pipeline_join.reference @@ -1,21 +1,19 @@ (Expression) -ExpressionTransform × 16 +ExpressionTransform (Join) - JoiningTransform × 16 2 → 1 - Resize 1 → 16 - (Expression) + JoiningTransform 2 → 1 + (Expression) + ExpressionTransform + (SettingQuotaAndLimits) + (Limit) + Limit + (ReadFromStorage) + Numbers 0 → 1 + (Expression) + FillingRightJoinSide ExpressionTransform (SettingQuotaAndLimits) (Limit) Limit (ReadFromStorage) Numbers 0 → 1 - (Expression) - Resize 1 → 16 - FillingRightJoinSide - ExpressionTransform - (SettingQuotaAndLimits) - (Limit) - Limit - (ReadFromStorage) - Numbers 0 → 1 From 0c0d4f14daa3d5dec53c545e6965c61250788304 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Wed, 27 Apr 2022 09:27:49 +0800 Subject: [PATCH 048/106] update test case 02236_explain_pipeline_join --- tests/queries/0_stateless/02236_explain_pipeline_join.sql | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02236_explain_pipeline_join.sql b/tests/queries/0_stateless/02236_explain_pipeline_join.sql index 7a92d6bfde0..de885ed74ee 100644 --- a/tests/queries/0_stateless/02236_explain_pipeline_join.sql +++ b/tests/queries/0_stateless/02236_explain_pipeline_join.sql @@ -7,5 +7,4 @@ ALL LEFT JOIN ( SELECT * FROM system.numbers LIMIT 10 ) t2 -USING number -SETTINGS max_threads=16; +USING number; From 520b05b9f13930caadd553d49b85f1cc993898b7 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Wed, 27 Apr 2022 10:08:22 +0800 Subject: [PATCH 049/106] update test case tests/queries/0_stateless/02236_explain_pipeline_join.sql --- src/Interpreters/InterpreterSelectQuery.cpp | 15 ++------- src/Processors/QueryPlan/JoinStep.cpp | 9 +++--- src/Processors/QueryPlan/JoinStep.h | 6 +++- .../QueryPlan/ResizeStreamsStep.cpp | 31 ------------------- src/Processors/QueryPlan/ResizeStreamsStep.h | 16 ---------- src/QueryPipeline/QueryPipelineBuilder.cpp | 10 +++++- src/QueryPipeline/QueryPipelineBuilder.h | 4 ++- .../02236_explain_pipeline_join.reference | 24 +++++++------- .../02236_explain_pipeline_join.sql | 3 +- 9 files changed, 40 insertions(+), 78 deletions(-) delete mode 100644 src/Processors/QueryPlan/ResizeStreamsStep.cpp delete mode 100644 src/Processors/QueryPlan/ResizeStreamsStep.h diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index cc9af95523c..ddf5f21fe35 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -54,7 +54,6 @@ #include #include #include -#include #include #include #include @@ -1269,22 +1268,14 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

supportParallelJoin()) - { - joined_plan->addStep(std::make_unique(joined_plan->getCurrentDataStream(), max_streams)); - } - if (!analysis_result.optimize_read_in_order) - { - // If optimize_read_in_order = true, do not change the left pipeline's stream size. - // otherwise will make the result wrong for order by. - query_plan.addStep(std::make_unique(query_plan.getCurrentDataStream(), max_streams)); - } QueryPlanStepPtr join_step = std::make_unique( query_plan.getCurrentDataStream(), joined_plan->getCurrentDataStream(), expressions.join, - settings.max_block_size); + settings.max_block_size, + max_streams, + analysis_result.optimize_read_in_order); join_step->setStepDescription("JOIN"); std::vector plans; diff --git a/src/Processors/QueryPlan/JoinStep.cpp b/src/Processors/QueryPlan/JoinStep.cpp index 494a2a6aa0e..334e5f7a08b 100644 --- a/src/Processors/QueryPlan/JoinStep.cpp +++ b/src/Processors/QueryPlan/JoinStep.cpp @@ -15,9 +15,10 @@ JoinStep::JoinStep( const DataStream & left_stream_, const DataStream & right_stream_, JoinPtr join_, - size_t max_block_size_) - : join(std::move(join_)) - , max_block_size(max_block_size_) + size_t max_block_size_, + size_t max_streams_, + bool keep_left_read_in_order_) + : join(std::move(join_)), max_block_size(max_block_size_), max_streams(max_streams_), keep_left_read_in_order(keep_left_read_in_order_) { input_streams = {left_stream_, right_stream_}; output_stream = DataStream @@ -31,7 +32,7 @@ QueryPipelineBuilderPtr JoinStep::updatePipeline(QueryPipelineBuilders pipelines if (pipelines.size() != 2) throw Exception(ErrorCodes::LOGICAL_ERROR, "JoinStep expect two input steps"); - return QueryPipelineBuilder::joinPipelines(std::move(pipelines[0]), std::move(pipelines[1]), join, max_block_size, &processors); + return QueryPipelineBuilder::joinPipelines(std::move(pipelines[0]), std::move(pipelines[1]), join, max_block_size, &processors, max_streams, keep_left_read_in_order); } void JoinStep::describePipeline(FormatSettings & settings) const diff --git a/src/Processors/QueryPlan/JoinStep.h b/src/Processors/QueryPlan/JoinStep.h index 71537f29a8e..b9d3dff1b65 100644 --- a/src/Processors/QueryPlan/JoinStep.h +++ b/src/Processors/QueryPlan/JoinStep.h @@ -16,7 +16,9 @@ public: const DataStream & left_stream_, const DataStream & right_stream_, JoinPtr join_, - size_t max_block_size_); + size_t max_block_size_, + size_t max_streams_, + bool keep_left_read_in_order_); String getName() const override { return "Join"; } @@ -29,6 +31,8 @@ public: private: JoinPtr join; size_t max_block_size; + size_t max_streams; + bool keep_left_read_in_order; Processors processors; }; diff --git a/src/Processors/QueryPlan/ResizeStreamsStep.cpp b/src/Processors/QueryPlan/ResizeStreamsStep.cpp deleted file mode 100644 index 580d11fa5f8..00000000000 --- a/src/Processors/QueryPlan/ResizeStreamsStep.cpp +++ /dev/null @@ -1,31 +0,0 @@ -#include -#include -namespace DB -{ -static ITransformingStep::Traits getTraits() -{ - return ITransformingStep::Traits - { - { - .preserves_distinct_columns = false, - .returns_single_stream = false, - .preserves_number_of_streams = true, - .preserves_sorting = false, - }, - { - .preserves_number_of_rows = false, - } - }; -} - -ResizeStreamsStep::ResizeStreamsStep(const DataStream & input_stream_, size_t pipeline_streams_) - : ITransformingStep(input_stream_, input_stream_.header, getTraits()) - , pipeline_streams(pipeline_streams_) -{ -} - -void ResizeStreamsStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) -{ - pipeline.resize(pipeline_streams); -} -} diff --git a/src/Processors/QueryPlan/ResizeStreamsStep.h b/src/Processors/QueryPlan/ResizeStreamsStep.h deleted file mode 100644 index 40cd654d3ab..00000000000 --- a/src/Processors/QueryPlan/ResizeStreamsStep.h +++ /dev/null @@ -1,16 +0,0 @@ -#pragma once -#include -#include - -namespace DB -{ -class ResizeStreamsStep : public ITransformingStep -{ -public: - explicit ResizeStreamsStep(const DataStream & input_stream_, size_t pipeline_streams_); - String getName() const override { return "ResizeStreamsStep"; } - void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; -private: - size_t pipeline_streams; -}; -} diff --git a/src/QueryPipeline/QueryPipelineBuilder.cpp b/src/QueryPipeline/QueryPipelineBuilder.cpp index 5feed7c55ea..567ba602afd 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.cpp +++ b/src/QueryPipeline/QueryPipelineBuilder.cpp @@ -307,7 +307,9 @@ std::unique_ptr QueryPipelineBuilder::joinPipelines( std::unique_ptr right, JoinPtr join, size_t max_block_size, - Processors * collected_processors) + Processors * collected_processors, + size_t max_streams, + bool keep_left_read_in_order) { left->checkInitializedAndNotCompleted(); right->checkInitializedAndNotCompleted(); @@ -346,9 +348,15 @@ std::unique_ptr QueryPipelineBuilder::joinPipelines( /// (totals) ─────────┘ ╙─────┘ auto num_streams = left->getNumStreams(); + if (!keep_left_read_in_order) + { + left->resize(max_streams); + num_streams = max_streams; + } if (join->supportParallelJoin() && !right->hasTotals()) { + right->resize(max_streams); auto concurrent_right_filling_transform = [&](OutputPortRawPtrs outports) { Processors processors; diff --git a/src/QueryPipeline/QueryPipelineBuilder.h b/src/QueryPipeline/QueryPipelineBuilder.h index ac84191cf34..6846a0266d4 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.h +++ b/src/QueryPipeline/QueryPipelineBuilder.h @@ -101,7 +101,9 @@ public: std::unique_ptr right, JoinPtr join, size_t max_block_size, - Processors * collected_processors = nullptr); + Processors * collected_processors = nullptr, + size_t max_streams = 0, + bool keep_left_read_in_order = true); /// Add other pipeline and execute it before current one. /// Pipeline must have empty header, it should not generate any chunk. diff --git a/tests/queries/0_stateless/02236_explain_pipeline_join.reference b/tests/queries/0_stateless/02236_explain_pipeline_join.reference index ed993e2a1e7..ad54708c9be 100644 --- a/tests/queries/0_stateless/02236_explain_pipeline_join.reference +++ b/tests/queries/0_stateless/02236_explain_pipeline_join.reference @@ -1,19 +1,21 @@ (Expression) -ExpressionTransform +ExpressionTransform × 16 (Join) - JoiningTransform 2 → 1 - (Expression) - ExpressionTransform - (SettingQuotaAndLimits) - (Limit) - Limit - (ReadFromStorage) - Numbers 0 → 1 - (Expression) - FillingRightJoinSide + JoiningTransform × 16 2 → 1 + Resize 1 → 16 + (Expression) ExpressionTransform (SettingQuotaAndLimits) (Limit) Limit (ReadFromStorage) Numbers 0 → 1 + (Expression) + Resize 1 → 16 + FillingRightJoinSide + ExpressionTransform + (SettingQuotaAndLimits) + (Limit) + Limit + (ReadFromStorage) + Numbers 0 → 1 diff --git a/tests/queries/0_stateless/02236_explain_pipeline_join.sql b/tests/queries/0_stateless/02236_explain_pipeline_join.sql index de885ed74ee..7a92d6bfde0 100644 --- a/tests/queries/0_stateless/02236_explain_pipeline_join.sql +++ b/tests/queries/0_stateless/02236_explain_pipeline_join.sql @@ -7,4 +7,5 @@ ALL LEFT JOIN ( SELECT * FROM system.numbers LIMIT 10 ) t2 -USING number; +USING number +SETTINGS max_threads=16; From 5738871a8b197bed3577c69a3856e19169ca35cd Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Wed, 27 Apr 2022 10:24:19 +0800 Subject: [PATCH 050/106] update QueryPipelineBuilder::joinPipelines --- src/Processors/QueryPlan/JoinStep.cpp | 2 +- src/QueryPipeline/QueryPipelineBuilder.cpp | 4 ++-- src/QueryPipeline/QueryPipelineBuilder.h | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Processors/QueryPlan/JoinStep.cpp b/src/Processors/QueryPlan/JoinStep.cpp index 334e5f7a08b..983be9d45fb 100644 --- a/src/Processors/QueryPlan/JoinStep.cpp +++ b/src/Processors/QueryPlan/JoinStep.cpp @@ -32,7 +32,7 @@ QueryPipelineBuilderPtr JoinStep::updatePipeline(QueryPipelineBuilders pipelines if (pipelines.size() != 2) throw Exception(ErrorCodes::LOGICAL_ERROR, "JoinStep expect two input steps"); - return QueryPipelineBuilder::joinPipelines(std::move(pipelines[0]), std::move(pipelines[1]), join, max_block_size, &processors, max_streams, keep_left_read_in_order); + return QueryPipelineBuilder::joinPipelines(std::move(pipelines[0]), std::move(pipelines[1]), join, max_block_size, max_streams, keep_left_read_in_order, &processors); } void JoinStep::describePipeline(FormatSettings & settings) const diff --git a/src/QueryPipeline/QueryPipelineBuilder.cpp b/src/QueryPipeline/QueryPipelineBuilder.cpp index 567ba602afd..6c441d804c4 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.cpp +++ b/src/QueryPipeline/QueryPipelineBuilder.cpp @@ -307,9 +307,9 @@ std::unique_ptr QueryPipelineBuilder::joinPipelines( std::unique_ptr right, JoinPtr join, size_t max_block_size, - Processors * collected_processors, size_t max_streams, - bool keep_left_read_in_order) + bool keep_left_read_in_order, + Processors * collected_processors) { left->checkInitializedAndNotCompleted(); right->checkInitializedAndNotCompleted(); diff --git a/src/QueryPipeline/QueryPipelineBuilder.h b/src/QueryPipeline/QueryPipelineBuilder.h index 6846a0266d4..ad25985ab48 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.h +++ b/src/QueryPipeline/QueryPipelineBuilder.h @@ -101,9 +101,9 @@ public: std::unique_ptr right, JoinPtr join, size_t max_block_size, - Processors * collected_processors = nullptr, - size_t max_streams = 0, - bool keep_left_read_in_order = true); + size_t max_streams, + bool keep_left_read_in_order, + Processors * collected_processors = nullptr); /// Add other pipeline and execute it before current one. /// Pipeline must have empty header, it should not generate any chunk. From 7505ce426a4cab8d9212b4fba144d9c046ddddcd Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Wed, 27 Apr 2022 13:11:24 +0800 Subject: [PATCH 051/106] update --- src/Interpreters/ConcurrentHashJoin.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Interpreters/ConcurrentHashJoin.cpp b/src/Interpreters/ConcurrentHashJoin.cpp index fee3616a697..83137da1bf3 100644 --- a/src/Interpreters/ConcurrentHashJoin.cpp +++ b/src/Interpreters/ConcurrentHashJoin.cpp @@ -83,9 +83,7 @@ bool ConcurrentHashJoin::addJoinedBlock(const Block & block, bool check_limits) iter = pending_blocks.erase(iter); } else - { iter++; - } } } From 10b7e08319e10e95d4765a1844cec7fb2683f74c Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Wed, 27 Apr 2022 16:41:34 +0800 Subject: [PATCH 052/106] update --- src/Interpreters/ConcurrentHashJoin.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Interpreters/ConcurrentHashJoin.cpp b/src/Interpreters/ConcurrentHashJoin.cpp index 83137da1bf3..fee3616a697 100644 --- a/src/Interpreters/ConcurrentHashJoin.cpp +++ b/src/Interpreters/ConcurrentHashJoin.cpp @@ -83,7 +83,9 @@ bool ConcurrentHashJoin::addJoinedBlock(const Block & block, bool check_limits) iter = pending_blocks.erase(iter); } else + { iter++; + } } } From 1fc51e09ff7e2ceb37f05f37fef97d4ef30d64ff Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 28 Apr 2022 18:51:13 +0000 Subject: [PATCH 053/106] fix insertion to column of type Object from multiple files via table function --- src/Columns/ColumnObject.cpp | 50 ++++++++++++++++++- src/Columns/ColumnObject.h | 11 ++-- src/DataTypes/ObjectUtils.cpp | 3 ++ src/Interpreters/InterpreterInsertQuery.cpp | 2 +- src/Processors/Formats/IRowInputFormat.cpp | 1 - .../01825_type_json_multiple_files.reference | 14 ++++++ .../01825_type_json_multiple_files.sh | 42 ++++++++++++++++ 7 files changed, 116 insertions(+), 7 deletions(-) create mode 100644 tests/queries/0_stateless/01825_type_json_multiple_files.reference create mode 100755 tests/queries/0_stateless/01825_type_json_multiple_files.sh diff --git a/src/Columns/ColumnObject.cpp b/src/Columns/ColumnObject.cpp index 64c7a84c263..f937cc8777f 100644 --- a/src/Columns/ColumnObject.cpp +++ b/src/Columns/ColumnObject.cpp @@ -334,7 +334,6 @@ void ColumnObject::Subcolumn::insert(Field field, FieldInfo info) void ColumnObject::Subcolumn::insertRangeFrom(const Subcolumn & src, size_t start, size_t length) { assert(src.isFinalized()); - const auto & src_column = src.data.back(); const auto & src_type = src.least_common_type.get(); @@ -646,10 +645,19 @@ void ColumnObject::get(size_t n, Field & res) const } } +void ColumnObject::insertFrom(const IColumn & src, size_t n) +{ + insert(src[n]); + finalize(); +} + void ColumnObject::insertRangeFrom(const IColumn & src, size_t start, size_t length) { const auto & src_object = assert_cast(src); + if (!src_object.isFinalized()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot insertRangeFrom non-finalized ColumnObject"); + for (auto & entry : subcolumns) { if (src_object.hasSubcolumn(entry->path)) @@ -658,6 +666,16 @@ void ColumnObject::insertRangeFrom(const IColumn & src, size_t start, size_t len entry->data.insertManyDefaults(length); } + for (const auto & entry : src_object.subcolumns) + { + if (!hasSubcolumn(entry->path)) + { + addSubcolumn(entry->path, num_rows); + auto & subcolumn = getSubcolumn(entry->path); + subcolumn.insertRangeFrom(entry->data, start, length); + } + } + num_rows += length; finalize(); } @@ -685,6 +703,36 @@ void ColumnObject::popBack(size_t length) num_rows -= length; } +template +ColumnPtr ColumnObject::applyForSubcolumns(Func && func, std::string_view func_name) const +{ + if (!isFinalized()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot {} non-finalized ColumnObject", func_name); + + auto res = ColumnObject::create(is_nullable); + for (const auto & subcolumn : subcolumns) + { + auto new_subcolumn = func(subcolumn->data.getFinalizedColumn()); + res->addSubcolumn(subcolumn->path, new_subcolumn->assumeMutable()); + } + return res; +} + +ColumnPtr ColumnObject::permute(const Permutation & perm, size_t limit) const +{ + return applyForSubcolumns([&](const auto & subcolumn) { return subcolumn.permute(perm, limit); }, "permute"); +} + +ColumnPtr ColumnObject::filter(const Filter & filter, ssize_t result_size_hint) const +{ + return applyForSubcolumns([&](const auto & subcolumn) { return subcolumn.filter(filter, result_size_hint); }, "filter"); +} + +ColumnPtr ColumnObject::index(const IColumn & indexes, size_t limit) const +{ + return applyForSubcolumns([&](const auto & subcolumn) { return subcolumn.index(indexes, limit); }, "index"); +} + const ColumnObject::Subcolumn & ColumnObject::getSubcolumn(const PathInData & key) const { if (const auto * node = subcolumns.findLeaf(key)) diff --git a/src/Columns/ColumnObject.h b/src/Columns/ColumnObject.h index 9d61b165042..28b464bc63b 100644 --- a/src/Columns/ColumnObject.h +++ b/src/Columns/ColumnObject.h @@ -193,15 +193,18 @@ public: void forEachSubcolumn(ColumnCallback callback) override; void insert(const Field & field) override; void insertDefault() override; + void insertFrom(const IColumn & src, size_t n) override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; ColumnPtr replicate(const Offsets & offsets) const override; void popBack(size_t length) override; Field operator[](size_t n) const override; void get(size_t n, Field & res) const override; + ColumnPtr permute(const Permutation & perm, size_t limit) const override; + ColumnPtr filter(const Filter & filter, ssize_t result_size_hint) const override; + ColumnPtr index(const IColumn & indexes, size_t limit) const override; /// All other methods throw exception. - ColumnPtr decompress() const override { throwMustBeConcrete(); } StringRef getDataAt(size_t) const override { throwMustBeConcrete(); } bool isDefaultAt(size_t) const override { throwMustBeConcrete(); } void insertData(const char *, size_t) override { throwMustBeConcrete(); } @@ -211,10 +214,7 @@ public: void updateHashWithValue(size_t, SipHash &) const override { throwMustBeConcrete(); } void updateWeakHash32(WeakHash32 &) const override { throwMustBeConcrete(); } void updateHashFast(SipHash &) const override { throwMustBeConcrete(); } - ColumnPtr filter(const Filter &, ssize_t) const override { throwMustBeConcrete(); } void expand(const Filter &, bool) override { throwMustBeConcrete(); } - ColumnPtr permute(const Permutation &, size_t) const override { throwMustBeConcrete(); } - ColumnPtr index(const IColumn &, size_t) const override { throwMustBeConcrete(); } int compareAt(size_t, size_t, const IColumn &, int) const override { throwMustBeConcrete(); } void compareColumn(const IColumn &, size_t, PaddedPODArray *, PaddedPODArray &, int, int) const override { throwMustBeConcrete(); } bool hasEqualValues() const override { throwMustBeConcrete(); } @@ -232,6 +232,9 @@ private: { throw Exception("ColumnObject must be converted to ColumnTuple before use", ErrorCodes::LOGICAL_ERROR); } + + template + ColumnPtr applyForSubcolumns(Func && func, std::string_view func_name) const; }; } diff --git a/src/DataTypes/ObjectUtils.cpp b/src/DataTypes/ObjectUtils.cpp index 044e03afd10..0d8dea4315d 100644 --- a/src/DataTypes/ObjectUtils.cpp +++ b/src/DataTypes/ObjectUtils.cpp @@ -107,6 +107,9 @@ DataTypePtr getDataTypeByColumn(const IColumn & column) if (WhichDataType(idx).isSimple()) return DataTypeFactory::instance().get(String(magic_enum::enum_name(idx))); + if (WhichDataType(idx).isNothing()) + return std::make_shared(); + if (const auto * column_array = checkAndGetColumn(&column)) return std::make_shared(getDataTypeByColumn(column_array->getData())); diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index b0aaac6e745..8408b0ac5fc 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -438,7 +438,7 @@ BlockIO InterpreterInsertQuery::execute() }); /// We need to convert Sparse columns to full, because it's destination storage - /// may not support it may have different settings for applying Sparse serialization. + /// may not support it or may have different settings for applying Sparse serialization. pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr { return std::make_shared(in_header); diff --git a/src/Processors/Formats/IRowInputFormat.cpp b/src/Processors/Formats/IRowInputFormat.cpp index f29f96bbb3b..9896f95bb54 100644 --- a/src/Processors/Formats/IRowInputFormat.cpp +++ b/src/Processors/Formats/IRowInputFormat.cpp @@ -213,7 +213,6 @@ Chunk IRowInputFormat::generate() finalizeObjectColumns(columns); Chunk chunk(std::move(columns), num_rows); - //chunk.setChunkInfo(std::move(chunk_missing_values)); return chunk; } diff --git a/tests/queries/0_stateless/01825_type_json_multiple_files.reference b/tests/queries/0_stateless/01825_type_json_multiple_files.reference new file mode 100644 index 00000000000..b887abc8590 --- /dev/null +++ b/tests/queries/0_stateless/01825_type_json_multiple_files.reference @@ -0,0 +1,14 @@ +{"data":{"k0":100,"k1":0,"k2":0,"k3":0,"k4":0,"k5":0}} +{"data":{"k0":0,"k1":100,"k2":0,"k3":0,"k4":0,"k5":0}} +{"data":{"k0":0,"k1":0,"k2":100,"k3":0,"k4":0,"k5":0}} +{"data":{"k0":0,"k1":0,"k2":0,"k3":100,"k4":0,"k5":0}} +{"data":{"k0":0,"k1":0,"k2":0,"k3":0,"k4":100,"k5":0}} +{"data":{"k0":0,"k1":0,"k2":0,"k3":0,"k4":0,"k5":100}} +Tuple(k0 Int8, k1 Int8, k2 Int8, k3 Int8, k4 Int8, k5 Int8) +{"data":{"k0":100,"k1":0,"k2":0}} +{"data":{"k0":0,"k1":100,"k2":0}} +{"data":{"k0":0,"k1":0,"k2":100}} +Tuple(k0 Int8, k1 Int8, k2 Int8) +{"data":{"k1":100,"k3":0}} +{"data":{"k1":0,"k3":100}} +Tuple(k1 Int8, k3 Int8) diff --git a/tests/queries/0_stateless/01825_type_json_multiple_files.sh b/tests/queries/0_stateless/01825_type_json_multiple_files.sh new file mode 100755 index 00000000000..2fde6aa2a5f --- /dev/null +++ b/tests/queries/0_stateless/01825_type_json_multiple_files.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-parallel + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +user_files_path=$($CLICKHOUSE_CLIENT --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep -E '^Code: 107.*FILE_DOESNT_EXIST' | head -1 | awk '{gsub("/nonexist.txt","",$9); print $9}') +[ -e "$user_files_path"/01825_file_*.json ] && rm "$user_files_path"/01825_file_*.json + +for i in {0..5}; do + echo "{\"k$i\": 100}" > "$user_files_path"/01825_file_$i.json +done + +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_json_files" +${CLICKHOUSE_CLIENT} -q "CREATE TABLE t_json_files (file String, data JSON) ENGINE = MergeTree ORDER BY tuple()" --allow_experimental_object_type 1 + +${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_files SELECT _file, data FROM file('01825_file_*.json', 'JSONAsObject', 'data JSON')" + +${CLICKHOUSE_CLIENT} -q "SELECT data FROM t_json_files ORDER BY file FORMAT JSONEachRow" --output_format_json_named_tuples_as_objects 1 +${CLICKHOUSE_CLIENT} -q "SELECT toTypeName(data) FROM t_json_files LIMIT 1" + +${CLICKHOUSE_CLIENT} -q "TRUNCATE TABLE IF EXISTS t_json_files" + +${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_files \ + SELECT _file, data FROM file('01825_file_*.json', 'JSONAsObject', 'data JSON') \ + ORDER BY _file LIMIT 3" + +${CLICKHOUSE_CLIENT} -q "SELECT data FROM t_json_files ORDER BY file FORMAT JSONEachRow" --output_format_json_named_tuples_as_objects 1 +${CLICKHOUSE_CLIENT} -q "SELECT toTypeName(data) FROM t_json_files LIMIT 1" + +${CLICKHOUSE_CLIENT} -q "TRUNCATE TABLE IF EXISTS t_json_files" + +${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_files \ + SELECT _file, data FROM file('01825_file_*.json', 'JSONAsObject', 'data JSON') \ + WHERE _file IN ('01825_file_1.json', '01825_file_3.json')" + +${CLICKHOUSE_CLIENT} -q "SELECT data FROM t_json_files ORDER BY file FORMAT JSONEachRow" --output_format_json_named_tuples_as_objects 1 +${CLICKHOUSE_CLIENT} -q "SELECT toTypeName(data) FROM t_json_files LIMIT 1" + +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_json_files" +rm "$user_files_path"/01825_file_*.json From 5a6b802aa8dd0d3408ca065cb73beebb6250780a Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Fri, 29 Apr 2022 09:29:04 +0800 Subject: [PATCH 054/106] update --- src/QueryPipeline/QueryPipelineBuilder.cpp | 40 ++++++++++++---------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/src/QueryPipeline/QueryPipelineBuilder.cpp b/src/QueryPipeline/QueryPipelineBuilder.cpp index 6c441d804c4..14457918f98 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.cpp +++ b/src/QueryPipeline/QueryPipelineBuilder.cpp @@ -348,28 +348,32 @@ std::unique_ptr QueryPipelineBuilder::joinPipelines( /// (totals) ─────────┘ ╙─────┘ auto num_streams = left->getNumStreams(); - if (!keep_left_read_in_order) - { - left->resize(max_streams); - num_streams = max_streams; - } - if (join->supportParallelJoin() && !right->hasTotals()) + if (join->supportParallelJoin()) { - right->resize(max_streams); - auto concurrent_right_filling_transform = [&](OutputPortRawPtrs outports) + if (!keep_left_read_in_order) { - Processors processors; - for (auto & outport : outports) + left->resize(max_streams); + num_streams = max_streams; + } + + if (!right->hasTotals()) + { + right->resize(max_streams); + auto concurrent_right_filling_transform = [&](OutputPortRawPtrs outports) { - auto adding_joined = std::make_shared(right->getHeader(), join); - connect(*outport, adding_joined->getInputs().front()); - processors.emplace_back(adding_joined); - } - return processors; - }; - right->transform(concurrent_right_filling_transform); - right->resize(1); + Processors processors; + for (auto & outport : outports) + { + auto adding_joined = std::make_shared(right->getHeader(), join); + connect(*outport, adding_joined->getInputs().front()); + processors.emplace_back(adding_joined); + } + return processors; + }; + right->transform(concurrent_right_filling_transform); + right->resize(1); + } } else { From fd750ceab867049adde79cfad4f2635b549d28d5 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Fri, 29 Apr 2022 15:50:32 +0800 Subject: [PATCH 055/106] update tests/queries/0_stateless/02236_explain_pipeline_join.reference --- .../02236_explain_pipeline_join.reference | 24 +++++++++---------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/tests/queries/0_stateless/02236_explain_pipeline_join.reference b/tests/queries/0_stateless/02236_explain_pipeline_join.reference index ad54708c9be..ed993e2a1e7 100644 --- a/tests/queries/0_stateless/02236_explain_pipeline_join.reference +++ b/tests/queries/0_stateless/02236_explain_pipeline_join.reference @@ -1,21 +1,19 @@ (Expression) -ExpressionTransform × 16 +ExpressionTransform (Join) - JoiningTransform × 16 2 → 1 - Resize 1 → 16 - (Expression) + JoiningTransform 2 → 1 + (Expression) + ExpressionTransform + (SettingQuotaAndLimits) + (Limit) + Limit + (ReadFromStorage) + Numbers 0 → 1 + (Expression) + FillingRightJoinSide ExpressionTransform (SettingQuotaAndLimits) (Limit) Limit (ReadFromStorage) Numbers 0 → 1 - (Expression) - Resize 1 → 16 - FillingRightJoinSide - ExpressionTransform - (SettingQuotaAndLimits) - (Limit) - Limit - (ReadFromStorage) - Numbers 0 → 1 From 5021cb707c6607967d6306c18a3354a8d9c8f614 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Fri, 29 Apr 2022 17:41:26 +0800 Subject: [PATCH 056/106] update --- src/Interpreters/ConcurrentHashJoin.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Interpreters/ConcurrentHashJoin.cpp b/src/Interpreters/ConcurrentHashJoin.cpp index fee3616a697..83137da1bf3 100644 --- a/src/Interpreters/ConcurrentHashJoin.cpp +++ b/src/Interpreters/ConcurrentHashJoin.cpp @@ -83,9 +83,7 @@ bool ConcurrentHashJoin::addJoinedBlock(const Block & block, bool check_limits) iter = pending_blocks.erase(iter); } else - { iter++; - } } } From 243315b82659026025b0951f8fb1ce5fb2c4d710 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 29 Apr 2022 17:24:51 +0000 Subject: [PATCH 057/106] fix insert to Object columns --- src/Columns/ColumnObject.cpp | 20 +++++++++++++++++-- src/Columns/ColumnObject.h | 2 ++ .../01825_type_json_ghdata.reference | 1 + .../0_stateless/01825_type_json_ghdata.sh | 14 +++++++++++++ .../01825_type_json_insert_select.reference | 3 +++ .../01825_type_json_insert_select.sql | 14 +++++++++++++ .../01825_type_json_nbagames.reference | 1 + .../0_stateless/01825_type_json_nbagames.sh | 13 ++++++++++++ 8 files changed, 66 insertions(+), 2 deletions(-) diff --git a/src/Columns/ColumnObject.cpp b/src/Columns/ColumnObject.cpp index f937cc8777f..c862aa0c344 100644 --- a/src/Columns/ColumnObject.cpp +++ b/src/Columns/ColumnObject.cpp @@ -654,7 +654,6 @@ void ColumnObject::insertFrom(const IColumn & src, size_t n) void ColumnObject::insertRangeFrom(const IColumn & src, size_t start, size_t length) { const auto & src_object = assert_cast(src); - if (!src_object.isFinalized()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot insertRangeFrom non-finalized ColumnObject"); @@ -670,7 +669,24 @@ void ColumnObject::insertRangeFrom(const IColumn & src, size_t start, size_t len { if (!hasSubcolumn(entry->path)) { - addSubcolumn(entry->path, num_rows); + if (entry->path.hasNested()) + { + const auto & base_type = entry->data.getLeastCommonTypeBase(); + FieldInfo field_info + { + .scalar_type = base_type, + .have_nulls = base_type->isNullable(), + .need_convert = false, + .num_dimensions = entry->data.getNumberOfDimensions(), + }; + + addNestedSubcolumn(entry->path, field_info, num_rows); + } + else + { + addSubcolumn(entry->path, num_rows); + } + auto & subcolumn = getSubcolumn(entry->path); subcolumn.insertRangeFrom(entry->data, start, length); } diff --git a/src/Columns/ColumnObject.h b/src/Columns/ColumnObject.h index 28b464bc63b..73fc738cb8d 100644 --- a/src/Columns/ColumnObject.h +++ b/src/Columns/ColumnObject.h @@ -68,6 +68,8 @@ public: bool isFinalized() const; const DataTypePtr & getLeastCommonType() const { return least_common_type.get(); } + const DataTypePtr & getLeastCommonTypeBase() const { return least_common_type.getBase(); } + size_t getNumberOfDimensions() const { return least_common_type.getNumberOfDimensions(); } /// Checks the consistency of column's parts stored in @data. void checkTypes() const; diff --git a/tests/queries/0_stateless/01825_type_json_ghdata.reference b/tests/queries/0_stateless/01825_type_json_ghdata.reference index 3418121ad43..bd9fa2d50da 100644 --- a/tests/queries/0_stateless/01825_type_json_ghdata.reference +++ b/tests/queries/0_stateless/01825_type_json_ghdata.reference @@ -18,3 +18,4 @@ phanwi346 114 Nicholas Piggin 95 direwolf-github 49 2 +1 diff --git a/tests/queries/0_stateless/01825_type_json_ghdata.sh b/tests/queries/0_stateless/01825_type_json_ghdata.sh index 7486571cc22..f7f6cadee9e 100755 --- a/tests/queries/0_stateless/01825_type_json_ghdata.sh +++ b/tests/queries/0_stateless/01825_type_json_ghdata.sh @@ -6,6 +6,8 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ghdata" +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ghdata_string" +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ghdata_from_string" ${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata (data JSON) ENGINE = MergeTree ORDER BY tuple()" --allow_experimental_object_type 1 @@ -35,4 +37,16 @@ ${CLICKHOUSE_CLIENT} -q \ ${CLICKHOUSE_CLIENT} -q "SELECT max(data.payload.pull_request.assignees.size0) FROM ghdata" +${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata_string (data String) ENGINE = MergeTree ORDER BY tuple()" +${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata_from_string (data JSON) ENGINE = MergeTree ORDER BY tuple()" --allow_experimental_object_type 1 + +cat $CUR_DIR/data_json/ghdata_sample.json | ${CLICKHOUSE_CLIENT} -q "INSERT INTO ghdata_string FORMAT JSONAsString" +${CLICKHOUSE_CLIENT} -q "INSERT INTO ghdata_from_string SELECT data FROM ghdata_string" + +${CLICKHOUSE_CLIENT} -q "SELECT \ + (SELECT toTypeName(any(data)), sum(cityHash64(flattenTuple(data))) FROM ghdata_from_string) = \ + (SELECT toTypeName(any(data)), sum(cityHash64(flattenTuple(data))) FROM ghdata)" + ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ghdata" +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ghdata_string" +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ghdata_from_string" diff --git a/tests/queries/0_stateless/01825_type_json_insert_select.reference b/tests/queries/0_stateless/01825_type_json_insert_select.reference index 8283cc5af48..6e50983f5ee 100644 --- a/tests/queries/0_stateless/01825_type_json_insert_select.reference +++ b/tests/queries/0_stateless/01825_type_json_insert_select.reference @@ -10,3 +10,6 @@ Tuple(arr Nested(k11 Int8, k22 String, k33 Int8), k1 Int8, k2 String, k3 String) 3 ([],3,'','aaa') 4 ([(5,'6',0),(7,'0',8)],0,'','') 5 ([(0,'str1',0)],0,'','') +{"data":{"k1":1,"k10":[{"a":"1","b":"2","c":{"k11":""}},{"a":"2","b":"3","c":{"k11":""}}]}} +{"data":{"k1":2,"k10":[{"a":"1","b":"2","c":{"k11":"haha"}}]}} +Tuple(k1 Int8, k10 Nested(a String, b String, c Tuple(k11 String))) diff --git a/tests/queries/0_stateless/01825_type_json_insert_select.sql b/tests/queries/0_stateless/01825_type_json_insert_select.sql index 8bb03f84f5a..b80fa08382d 100644 --- a/tests/queries/0_stateless/01825_type_json_insert_select.sql +++ b/tests/queries/0_stateless/01825_type_json_insert_select.sql @@ -34,3 +34,17 @@ SELECT id, data FROM type_json_dst ORDER BY id; DROP TABLE type_json_src; DROP TABLE type_json_dst; + +CREATE TABLE type_json_dst (data JSON) ENGINE = MergeTree ORDER BY tuple(); +CREATE TABLE type_json_src (data String) ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO type_json_src FORMAT JSONAsString {"k1": 1, "k10": [{"a": "1", "b": "2"}, {"a": "2", "b": "3"}]}; +INSERT INTO type_json_src FORMAT JSONAsString {"k1": 2, "k10": [{"a": "1", "b": "2", "c": {"k11": "haha"}}]}; +INSERT INTO type_json_dst SELECT data FROM type_json_src; + +SET output_format_json_named_tuples_as_objects = 1; +SELECT * FROM type_json_dst ORDER BY data.k1 FORMAT JSONEachRow; +SELECT toTypeName(data) FROM type_json_dst LIMIT 1; + +DROP TABLE type_json_src; +DROP TABLE type_json_dst; diff --git a/tests/queries/0_stateless/01825_type_json_nbagames.reference b/tests/queries/0_stateless/01825_type_json_nbagames.reference index 8f86bfe613e..863f5a0db84 100644 --- a/tests/queries/0_stateless/01825_type_json_nbagames.reference +++ b/tests/queries/0_stateless/01825_type_json_nbagames.reference @@ -10,3 +10,4 @@ Clyde Drexler 4 Alvin Robertson 3 Magic Johnson 3 Charles Barkley 2 +1 diff --git a/tests/queries/0_stateless/01825_type_json_nbagames.sh b/tests/queries/0_stateless/01825_type_json_nbagames.sh index 18e7c050680..e13d004ac58 100755 --- a/tests/queries/0_stateless/01825_type_json_nbagames.sh +++ b/tests/queries/0_stateless/01825_type_json_nbagames.sh @@ -6,6 +6,8 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS nbagames" +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS nbagames_string" +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS nbagames_from_string" ${CLICKHOUSE_CLIENT} -q "CREATE TABLE nbagames (data JSON) ENGINE = MergeTree ORDER BY tuple()" --allow_experimental_object_type 1 @@ -36,5 +38,16 @@ ${CLICKHOUSE_CLIENT} -q \ ) \ GROUP BY player ORDER BY triple_doubles DESC, player LIMIT 5" +${CLICKHOUSE_CLIENT} -q "CREATE TABLE nbagames_string (data String) ENGINE = MergeTree ORDER BY tuple()" +${CLICKHOUSE_CLIENT} -q "CREATE TABLE nbagames_from_string (data JSON) ENGINE = MergeTree ORDER BY tuple()" --allow_experimental_object_type 1 + +cat $CUR_DIR/data_json/nbagames_sample.json | ${CLICKHOUSE_CLIENT} -q "INSERT INTO nbagames_string FORMAT JSONAsString" +${CLICKHOUSE_CLIENT} -q "INSERT INTO nbagames_from_string SELECT data FROM nbagames_string" + +${CLICKHOUSE_CLIENT} -q "SELECT \ + (SELECT toTypeName(any(data)), sum(cityHash64(flattenTuple(data))) FROM nbagames_from_string) = \ + (SELECT toTypeName(any(data)), sum(cityHash64(flattenTuple(data))) FROM nbagames)" ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS nbagames" +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS nbagames_string" +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS nbagames_from_string" From 9878cae3e85eaf348e85e6627ef9f0c51055946f Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 29 Apr 2022 18:48:19 +0000 Subject: [PATCH 058/106] allow to execute hash function with arguments of type Array(Tuple(..)) --- src/Functions/FunctionsHashing.h | 6 +++--- .../queries/0_stateless/02292_hash_array_tuples.reference | 6 ++++++ tests/queries/0_stateless/02292_hash_array_tuples.sql | 7 +++++++ 3 files changed, 16 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/02292_hash_array_tuples.reference create mode 100644 tests/queries/0_stateless/02292_hash_array_tuples.sql diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index b78ecb5c72a..750c247b518 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -987,7 +987,8 @@ private: const size_t nested_size = nested_column->size(); typename ColumnVector::Container vec_temp(nested_size); - executeAny(nested_type, nested_column, vec_temp); + bool nested_is_first = true; + executeForArgument(nested_type, nested_column, vec_temp, nested_is_first); const size_t size = offsets.size(); @@ -1058,8 +1059,7 @@ private: else if (which.isString()) executeString(icolumn, vec_to); else if (which.isFixedString()) executeString(icolumn, vec_to); else if (which.isArray()) executeArray(from_type, icolumn, vec_to); - else - executeGeneric(icolumn, vec_to); + else executeGeneric(icolumn, vec_to); } void executeForArgument(const IDataType * type, const IColumn * column, typename ColumnVector::Container & vec_to, bool & is_first) const diff --git a/tests/queries/0_stateless/02292_hash_array_tuples.reference b/tests/queries/0_stateless/02292_hash_array_tuples.reference new file mode 100644 index 00000000000..4e930173ee8 --- /dev/null +++ b/tests/queries/0_stateless/02292_hash_array_tuples.reference @@ -0,0 +1,6 @@ +14617701568871014978 +12913842429399915005 +8351543757058688770 +6020241010869553514 +12732328028874882204 +12371801021764949421 Array(Tuple(UInt8, Array(Tuple(UInt8, Tuple(UInt8, UInt8, Array(Tuple(UInt8, UInt8))))))) diff --git a/tests/queries/0_stateless/02292_hash_array_tuples.sql b/tests/queries/0_stateless/02292_hash_array_tuples.sql new file mode 100644 index 00000000000..fb9029aa288 --- /dev/null +++ b/tests/queries/0_stateless/02292_hash_array_tuples.sql @@ -0,0 +1,7 @@ +SELECT cityHash64([(1, 'a'), (2, 'b')]); +SELECT cityHash64([(1, 'c'), (2, 'b')]); +SELECT sipHash64([(1, 'a'), (2, 'b')]); +SELECT halfMD5([(1, 'a'), (2, 'b')]); +SELECT murmurHash2_64([(1, 'a'), (2, 'b'), (3, 'c')]); + +SELECT cityHash64([(1, [(1, (3, 4, [(5, 6), (7, 8)]))]), (2, [])] AS c), toTypeName(c); From db9f570594147a52f5870c4aa28467f8a05a939d Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 29 Apr 2022 21:33:59 +0000 Subject: [PATCH 059/106] fix tests --- tests/queries/0_stateless/02292_hash_array_tuples.reference | 2 +- tests/queries/0_stateless/02292_hash_array_tuples.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02292_hash_array_tuples.reference b/tests/queries/0_stateless/02292_hash_array_tuples.reference index 4e930173ee8..a0f00bb55e6 100644 --- a/tests/queries/0_stateless/02292_hash_array_tuples.reference +++ b/tests/queries/0_stateless/02292_hash_array_tuples.reference @@ -1,6 +1,6 @@ 14617701568871014978 12913842429399915005 8351543757058688770 -6020241010869553514 +6955381966182986171 12732328028874882204 12371801021764949421 Array(Tuple(UInt8, Array(Tuple(UInt8, Tuple(UInt8, UInt8, Array(Tuple(UInt8, UInt8))))))) diff --git a/tests/queries/0_stateless/02292_hash_array_tuples.sql b/tests/queries/0_stateless/02292_hash_array_tuples.sql index fb9029aa288..919b2a0e6de 100644 --- a/tests/queries/0_stateless/02292_hash_array_tuples.sql +++ b/tests/queries/0_stateless/02292_hash_array_tuples.sql @@ -1,7 +1,7 @@ SELECT cityHash64([(1, 'a'), (2, 'b')]); SELECT cityHash64([(1, 'c'), (2, 'b')]); SELECT sipHash64([(1, 'a'), (2, 'b')]); -SELECT halfMD5([(1, 'a'), (2, 'b')]); +SELECT farmHash64([(1, 'a'), (2, 'b')]); SELECT murmurHash2_64([(1, 'a'), (2, 'b'), (3, 'c')]); SELECT cityHash64([(1, [(1, (3, 4, [(5, 6), (7, 8)]))]), (2, [])] AS c), toTypeName(c); From d6c0de0d406c9f01c9efbb8761248209fc3e54f1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 30 Apr 2022 12:29:23 +0200 Subject: [PATCH 060/106] Revert "Merge pull request #34055 from palegre-tiny/groupSortedArray" This reverts commit f055d7b692f977155f8c2abc3d3753a89ae0a7c7, reversing changes made to 4ec3c35e14b5b7ec4005429a1f962155166637ec. --- .../reference/grouparraysorted.md | 48 --- .../AggregateFunctionGroupArraySorted.cpp | 147 --------- .../AggregateFunctionGroupArraySorted.h | 310 ------------------ .../AggregateFunctionGroupArraySortedData.h | 162 --------- .../registerAggregateFunctions.cpp | 2 - tests/performance/group_array_sorted.xml | 11 - .../02158_grouparraysorted.reference | 18 - .../0_stateless/02158_grouparraysorted.sql | 43 --- 8 files changed, 741 deletions(-) delete mode 100644 docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md delete mode 100644 src/AggregateFunctions/AggregateFunctionGroupArraySorted.cpp delete mode 100644 src/AggregateFunctions/AggregateFunctionGroupArraySorted.h delete mode 100644 src/AggregateFunctions/AggregateFunctionGroupArraySortedData.h delete mode 100644 tests/performance/group_array_sorted.xml delete mode 100644 tests/queries/0_stateless/02158_grouparraysorted.reference delete mode 100644 tests/queries/0_stateless/02158_grouparraysorted.sql diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md deleted file mode 100644 index e34fcbc5788..00000000000 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md +++ /dev/null @@ -1,48 +0,0 @@ ---- -sidebar_position: 108 ---- - -# groupArraySorted {#groupArraySorted} - -Returns an array with the first N items in ascending order. - -``` sql -groupArraySorted(N)(column) -``` - -**Arguments** - -- `N` – The number of elements to return. - -If the parameter is omitted, default value 10 is used. - -**Arguments** - -- `column` – The value. -- `expr` — Optional. The field or expresion to sort by. If not set values are sorted by themselves. - -**Example** - -Gets the first 10 numbers: - -``` sql -SELECT groupArraySorted(10)(number) FROM numbers(100) -``` - -``` text -┌─groupArraySorted(10)(number)─┐ -│ [0,1,2,3,4,5,6,7,8,9] │ -└──────────────────────────────┘ -``` - -Or the last 10: - -``` sql -SELECT groupArraySorted(10)(number, -number) FROM numbers(100) -``` - -``` text -┌─groupArraySorted(10)(number, negate(number))─┐ -│ [99,98,97,96,95,94,93,92,91,90] │ -└──────────────────────────────────────────────┘ -``` \ No newline at end of file diff --git a/src/AggregateFunctions/AggregateFunctionGroupArraySorted.cpp b/src/AggregateFunctions/AggregateFunctionGroupArraySorted.cpp deleted file mode 100644 index 50d5f075322..00000000000 --- a/src/AggregateFunctions/AggregateFunctionGroupArraySorted.cpp +++ /dev/null @@ -1,147 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include - - -static inline constexpr UInt64 GROUP_SORTED_ARRAY_MAX_SIZE = 0xFFFFFF; -static inline constexpr UInt64 GROUP_SORTED_ARRAY_DEFAULT_THRESHOLD = 10; - - -namespace DB -{ -struct Settings; - -namespace ErrorCodes -{ - extern const int ARGUMENT_OUT_OF_BOUND; - extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; -} - - -namespace -{ - template - class AggregateFunctionGroupArraySortedNumeric : public AggregateFunctionGroupArraySorted - { - using AggregateFunctionGroupArraySorted::AggregateFunctionGroupArraySorted; - }; - - template - class AggregateFunctionGroupArraySortedFieldType - : public AggregateFunctionGroupArraySorted - { - using AggregateFunctionGroupArraySorted:: - AggregateFunctionGroupArraySorted; - DataTypePtr getReturnType() const override { return std::make_shared(std::make_shared()); } - }; - - template